diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 1f02d959496..f062ea90738 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,8 @@ +2015-01-15 Thomas Schwinge + + * gcc_update (files_and_dependencies): Update rules for new + libgomp/plugin/Makefrag.am and libgomp/plugin/configfrag.ac files. + 2015-01-12 Yury Gribov * check_GNU_style.sh: Support patches coming from stdin. diff --git a/contrib/gcc_update b/contrib/gcc_update index 46ef788e0ec..5ba3a057bf4 100755 --- a/contrib/gcc_update +++ b/contrib/gcc_update @@ -139,8 +139,10 @@ libcpp/aclocal.m4: libcpp/configure.ac libcpp/Makefile.in: libcpp/configure.ac libcpp/aclocal.m4 libcpp/configure: libcpp/configure.ac libcpp/aclocal.m4 libgomp/aclocal.m4: libgomp/configure.ac libgomp/acinclude.m4 +libgomp/Makefile.am: libgomp/plugin/Makefrag.am libgomp/Makefile.in: libgomp/Makefile.am libgomp/aclocal.m4 libgomp/testsuite/Makefile.in: libgomp/testsuite/Makefile.am libgomp/aclocal.m4 +libgomp/configure.ac: libgomp/plugin/configfrag.ac libgomp/configure: libgomp/configure.ac libgomp/aclocal.m4 libgomp/config.h.in: libgomp/configure.ac libgomp/aclocal.m4 libitm/aclocal.m4: libitm/configure.ac libitm/acinclude.m4 diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d40a3f0864e..3a27df9923f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,183 @@ +2015-01-15 Thomas Schwinge + Bernd Schmidt + Cesar Philippidis + James Norris + Tom de Vries + Ilmir Usmanov + Dmitry Bocharnikov + Evgeny Gavrin + Jakub Jelinek + + * builtin-types.def (BT_FN_VOID_INT_INT_VAR) + (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR) + (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR): + New function types. + * builtins.c: Include "gomp-constants.h". + (expand_builtin_acc_on_device): New function. + (expand_builtin, is_inexpensive_builtin): Handle + BUILT_IN_ACC_ON_DEVICE. + * builtins.def (DEF_GOACC_BUILTIN, DEF_GOACC_BUILTIN_COMPILER): + New macros. + * cgraph.c (cgraph_node::create): Consider flag_openacc next to + flag_openmp. + * config.gcc (tm_file): Add nvptx/offload.h. + <*-intelmic-* | *-intelmicemul-*> (tm_file): Add + i386/intelmic-offload.h. + * gcc.c (LINK_COMMAND_SPEC, GOMP_SELF_SPECS): For -fopenacc, link + to libgomp and its dependencies. + * config/arc/arc.h (LINK_COMMAND_SPEC): Likewise. + * config/darwin.h (LINK_COMMAND_SPEC_A): Likewise. + * config/i386/mingw32.h (GOMP_SELF_SPECS): Likewise. + * config/ia64/hpux.h (LIB_SPEC): Likewise. + * config/pa/pa-hpux11.h (LIB_SPEC): Likewise. + * config/pa/pa64-hpux.h (LIB_SPEC): Likewise. + * doc/generic.texi: Update for OpenACC changes. + * doc/gimple.texi: Likewise. + * doc/invoke.texi: Likewise. + * doc/sourcebuild.texi: Likewise. + * gimple-pretty-print.c (dump_gimple_omp_for): Handle + GF_OMP_FOR_KIND_OACC_LOOP. + (dump_gimple_omp_target): Handle GF_OMP_TARGET_KIND_OACC_KERNELS, + GF_OMP_TARGET_KIND_OACC_PARALLEL, GF_OMP_TARGET_KIND_OACC_DATA, + GF_OMP_TARGET_KIND_OACC_UPDATE, + GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA. + Dump more data. + * gimple.c: Update comments for OpenACC changes. + * gimple.def: Likewise. + * gimple.h: Likewise. + (enum gf_mask): Add GF_OMP_FOR_KIND_OACC_LOOP, + GF_OMP_TARGET_KIND_OACC_PARALLEL, GF_OMP_TARGET_KIND_OACC_KERNELS, + GF_OMP_TARGET_KIND_OACC_DATA, GF_OMP_TARGET_KIND_OACC_UPDATE, + GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA. + (gimple_omp_for_cond, gimple_omp_for_set_cond): Sort in the + appropriate place. + (is_gimple_omp_oacc, is_gimple_omp_offloaded): New functions. + * gimplify.c: Include "gomp-constants.h". + Update comments for OpenACC changes. + (is_gimple_stmt): Handle OACC_PARALLEL, OACC_KERNELS, OACC_DATA, + OACC_HOST_DATA, OACC_DECLARE, OACC_UPDATE, OACC_ENTER_DATA, + OACC_EXIT_DATA, OACC_CACHE, OACC_LOOP. + (gimplify_scan_omp_clauses, gimplify_adjust_omp_clauses): Handle + OMP_CLAUSE__CACHE_, OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, + OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, + OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, + OMP_CLAUSE_VECTOR, OMP_CLAUSE_DEVICE_RESIDENT, + OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, + OMP_CLAUSE_SEQ. + (gimplify_adjust_omp_clauses_1, gimplify_adjust_omp_clauses): Use + GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use + OMP_CLAUSE_SET_MAP_KIND. + (gimplify_oacc_cache): New function. + (gimplify_omp_for): Handle OACC_LOOP. + (gimplify_omp_workshare): Handle OACC_KERNELS, OACC_PARALLEL, + OACC_DATA. + (gimplify_omp_target_update): Handle OACC_ENTER_DATA, + OACC_EXIT_DATA, OACC_UPDATE. + (gimplify_expr): Handle OACC_LOOP, OACC_CACHE, OACC_HOST_DATA, + OACC_DECLARE, OACC_KERNELS, OACC_PARALLEL, OACC_DATA, + OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_UPDATE. + (gimplify_body): Consider flag_openacc next to flag_openmp. + * lto-streamer-out.c: Include "gomp-constants.h". + * omp-builtins.def (BUILT_IN_ACC_GET_DEVICE_TYPE) + (BUILT_IN_GOACC_DATA_START, BUILT_IN_GOACC_DATA_END) + (BUILT_IN_GOACC_ENTER_EXIT_DATA, BUILT_IN_GOACC_PARALLEL) + (BUILT_IN_GOACC_UPDATE, BUILT_IN_GOACC_WAIT) + (BUILT_IN_GOACC_GET_THREAD_NUM, BUILT_IN_GOACC_GET_NUM_THREADS) + (BUILT_IN_ACC_ON_DEVICE): New builtins. + * omp-low.c: Include "gomp-constants.h". + Update comments for OpenACC changes. + (struct omp_context): Add reduction_map, gwv_below, gwv_this + members. + (extract_omp_for_data, use_pointer_for_field, install_var_field) + (new_omp_context, delete_omp_context, scan_sharing_clauses) + (create_omp_child_function, scan_omp_for, scan_omp_target) + (check_omp_nesting_restrictions, lower_reduction_clauses) + (build_omp_regions_1, diagnose_sb_0, make_gimple_omp_edges): + Update for OpenACC changes. + (scan_sharing_clauses): Handle OMP_CLAUSE_NUM_GANGS: + OMP_CLAUSE_NUM_WORKERS: OMP_CLAUSE_VECTOR_LENGTH, + OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_GANG, + OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_DEVICE_RESIDENT, + OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE__CACHE_, OMP_CLAUSE_INDEPENDENT, + OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ. Use GOMP_MAP_* instead of + OMP_CLAUSE_MAP_*. + (expand_omp_for_static_nochunk, expand_omp_for_static_chunk): + Handle GF_OMP_FOR_KIND_OACC_LOOP. + (expand_omp_target, lower_omp_target): Handle + GF_OMP_TARGET_KIND_OACC_PARALLEL, GF_OMP_TARGET_KIND_OACC_KERNELS, + GF_OMP_TARGET_KIND_OACC_UPDATE, + GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA, + GF_OMP_TARGET_KIND_OACC_DATA. + (pass_expand_omp::execute, execute_lower_omp) + (pass_diagnose_omp_blocks::gate): Consider flag_openacc next to + flag_openmp. + (offload_symbol_decl): New variable. + (oacc_get_reduction_array_id, oacc_max_threads) + (get_offload_symbol_decl, get_base_type, lookup_oacc_reduction) + (maybe_lookup_oacc_reduction, enclosing_target_ctx) + (oacc_loop_or_target_p, oacc_lower_reduction_var_helper) + (oacc_gimple_assign, oacc_initialize_reduction_data) + (oacc_finalize_reduction_data, oacc_process_reduction_data): New + functions. + (is_targetreg_ctx): Remove function. + * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__CACHE_, + OMP_CLAUSE_DEVICE_RESIDENT, OMP_CLAUSE_USE_DEVICE, + OMP_CLAUSE_GANG, OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, + OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ, OMP_CLAUSE_INDEPENDENT, + OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_NUM_GANGS, + OMP_CLAUSE_NUM_WORKERS, OMP_CLAUSE_VECTOR_LENGTH. + * tree.c (omp_clause_code_name, walk_tree_1): Update accordingly. + * tree.h (OMP_CLAUSE_GANG_EXPR, OMP_CLAUSE_GANG_STATIC_EXPR) + (OMP_CLAUSE_ASYNC_EXPR, OMP_CLAUSE_WAIT_EXPR) + (OMP_CLAUSE_VECTOR_EXPR, OMP_CLAUSE_WORKER_EXPR) + (OMP_CLAUSE_NUM_GANGS_EXPR, OMP_CLAUSE_NUM_WORKERS_EXPR) + (OMP_CLAUSE_VECTOR_LENGTH_EXPR): New macros. + * tree-core.h: Update comments for OpenACC changes. + (enum omp_clause_map_kind): Remove. + (struct tree_omp_clause): Change type of map_kind member from enum + omp_clause_map_kind to unsigned char. + * tree-inline.c: Update comments for OpenACC changes. + * tree-nested.c: Likewise. Include "gomp-constants.h". + (convert_nonlocal_reference_stmt, convert_local_reference_stmt) + (convert_tramp_reference_stmt, convert_gimple_call): Update for + OpenACC changes. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use + OMP_CLAUSE_SET_MAP_KIND. + * tree-pretty-print.c: Include "gomp-constants.h". + (dump_omp_clause): Handle OMP_CLAUSE_DEVICE_RESIDENT, + OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE__CACHE_, OMP_CLAUSE_GANG, + OMP_CLAUSE_ASYNC, OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ, + OMP_CLAUSE_WAIT, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, + OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, + OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_INDEPENDENT. Use GOMP_MAP_* + instead of OMP_CLAUSE_MAP_*. + (dump_generic_node): Handle OACC_PARALLEL, OACC_KERNELS, + OACC_DATA, OACC_HOST_DATA, OACC_DECLARE, OACC_UPDATE, + OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_CACHE, OACC_LOOP. + * tree-streamer-in.c: Include "gomp-constants.h". + (unpack_ts_omp_clause_value_fields) Use GOMP_MAP_* instead of + OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. + * tree-streamer-out.c: Include "gomp-constants.h". + (pack_ts_omp_clause_value_fields): Use GOMP_MAP_* instead of + OMP_CLAUSE_MAP_*. + * tree.def (OACC_PARALLEL, OACC_KERNELS, OACC_DATA) + (OACC_HOST_DATA, OACC_LOOP, OACC_CACHE, OACC_DECLARE) + (OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_UPDATE): New tree codes. + * tree.c (omp_clause_num_ops): Update accordingly. + * tree.h (OMP_BODY, OMP_CLAUSES, OMP_LOOP_CHECK, OMP_CLAUSE_SIZE): + Likewise. + (OACC_PARALLEL_BODY, OACC_PARALLEL_CLAUSES, OACC_KERNELS_BODY) + (OACC_KERNELS_CLAUSES, OACC_DATA_BODY, OACC_DATA_CLAUSES) + (OACC_HOST_DATA_BODY, OACC_HOST_DATA_CLAUSES, OACC_CACHE_CLAUSES) + (OACC_DECLARE_CLAUSES, OACC_ENTER_DATA_CLAUSES) + (OACC_EXIT_DATA_CLAUSES, OACC_UPDATE_CLAUSES) + (OACC_KERNELS_COMBINED, OACC_PARALLEL_COMBINED): New macros. + * tree.h (OMP_CLAUSE_MAP_KIND): Cast it to enum gomp_map_kind. + (OMP_CLAUSE_SET_MAP_KIND): New macro. + * varpool.c (varpool_node::get_create): Consider flag_openacc next + to flag_openmp. + * config/i386/intelmic-offload.h: New file. + * config/nvptx/offload.h: Likewise. + 2015-01-15 Prathamesh Kulkarni * explow.h: Remove duplicate contents. diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 93efb49ff0f..c130f7d22d4 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,8 @@ +2015-01-15 Thomas Schwinge + + * gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_8) + (DEF_FUNCTION_TYPE_VAR_12): New macros. + 2015-01-09 Michael Collison * gcc-interface/cuintp.c: Include hash-set.h, machmode.h, diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c index b62c7d4c4a3..477e39bf14c 100644 --- a/gcc/ada/gcc-interface/utils.c +++ b/gcc/ada/gcc-interface/utils.c @@ -5339,6 +5339,12 @@ enum c_builtin_type #define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME, #define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ NAME, +#define DEF_FUNCTION_TYPE_VAR_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) \ + NAME, +#define DEF_FUNCTION_TYPE_VAR_12(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, ARG12) \ + NAME, #define DEF_POINTER_TYPE(NAME, TYPE) NAME, #include "builtin-types.def" #undef DEF_PRIMITIVE_TYPE @@ -5357,6 +5363,8 @@ enum c_builtin_type #undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_5 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE BT_LAST }; @@ -5462,6 +5470,14 @@ install_builtin_function_types (void) def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4); #define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5); +#define DEF_FUNCTION_TYPE_VAR_8(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) \ + def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \ + ARG7, ARG8); +#define DEF_FUNCTION_TYPE_VAR_12(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, ARG12) \ + def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \ + ARG7, ARG8, ARG9, ARG10, ARG11, ARG12); #define DEF_POINTER_TYPE(ENUM, TYPE) \ builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]); @@ -5483,6 +5499,8 @@ install_builtin_function_types (void) #undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_5 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE builtin_types[(int) BT_LAST] = NULL_TREE; } diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index 34cec434fb0..3412677ef92 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -568,6 +568,8 @@ DEF_FUNCTION_TYPE_VAR_2 (BT_FN_INT_INT_CONST_STRING_VAR, BT_INT, BT_INT, BT_CONST_STRING) DEF_FUNCTION_TYPE_VAR_2 (BT_FN_PTR_CONST_PTR_SIZE_VAR, BT_PTR, BT_CONST_PTR, BT_SIZE) +DEF_FUNCTION_TYPE_VAR_2 (BT_FN_VOID_INT_INT_VAR, BT_VOID, + BT_INT, BT_INT) DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_STRING_SIZE_CONST_STRING_VAR, BT_INT, BT_STRING, BT_SIZE, BT_CONST_STRING) @@ -586,6 +588,15 @@ DEF_FUNCTION_TYPE_VAR_5 (BT_FN_INT_STRING_SIZE_INT_SIZE_CONST_STRING_VAR, DEF_FUNCTION_TYPE_VAR_5 (BT_FN_INT_INT_INT_INT_INT_INT_VAR, BT_INT, BT_INT, BT_INT, BT_INT, BT_INT, BT_INT) +DEF_FUNCTION_TYPE_VAR_8 (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR, + BT_VOID, BT_INT, BT_PTR, BT_SIZE, BT_PTR, BT_PTR, + BT_PTR, BT_INT, BT_INT) + +DEF_FUNCTION_TYPE_VAR_12 (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR, + BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_SIZE, + BT_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT, BT_INT, + BT_INT, BT_INT) + DEF_POINTER_TYPE (BT_PTR_FN_VOID_VAR, BT_FN_VOID_VAR) DEF_FUNCTION_TYPE_3 (BT_FN_PTR_PTR_FN_VOID_VAR_PTR_SIZE, BT_PTR, BT_PTR_FN_VOID_VAR, BT_PTR, BT_SIZE) diff --git a/gcc/builtins.c b/gcc/builtins.c index 737023b3a92..9a6a11b2ce6 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "tree-chkp.h" #include "rtl-chkp.h" +#include "gomp-constants.h" static tree do_mpc_arg1 (tree, tree, int (*)(mpc_ptr, mpc_srcptr, mpc_rnd_t)); @@ -5903,6 +5904,47 @@ expand_stack_save (void) return ret; } + +/* Expand OpenACC acc_on_device. + + This has to happen late (that is, not in early folding; expand_builtin_*, + rather than fold_builtin_*), as we have to act differently for host and + acceleration device (ACCEL_COMPILER conditional). */ + +static rtx +expand_builtin_acc_on_device (tree exp, rtx target) +{ + if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE)) + return NULL_RTX; + + tree arg = CALL_EXPR_ARG (exp, 0); + + /* Return (arg == v1 || arg == v2) ? 1 : 0. */ + machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg)); + rtx v = expand_normal (arg), v1, v2; +#ifdef ACCEL_COMPILER + v1 = GEN_INT (GOMP_DEVICE_NOT_HOST); + v2 = GEN_INT (ACCEL_COMPILER_acc_device); +#else + v1 = GEN_INT (GOMP_DEVICE_NONE); + v2 = GEN_INT (GOMP_DEVICE_HOST); +#endif + machine_mode target_mode = TYPE_MODE (integer_type_node); + if (!REG_P (target) || GET_MODE (target) != target_mode) + target = gen_reg_rtx (target_mode); + emit_move_insn (target, const1_rtx); + rtx_code_label *done_label = gen_label_rtx (); + do_compare_rtx_and_jump (v, v1, EQ, false, v_mode, NULL_RTX, + NULL_RTX, done_label, PROB_EVEN); + do_compare_rtx_and_jump (v, v2, EQ, false, v_mode, NULL_RTX, + NULL_RTX, done_label, PROB_EVEN); + emit_move_insn (target, const0_rtx); + emit_label (done_label); + + return target; +} + + /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient (and in mode MODE if that's convenient). @@ -7041,6 +7083,12 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, error ("Your target platform does not support -fcheck-pointer-bounds"); break; + case BUILT_IN_ACC_ON_DEVICE: + target = expand_builtin_acc_on_device (exp, target); + if (target) + return target; + break; + default: /* just do library call, if unknown builtin */ break; } @@ -12478,6 +12526,7 @@ is_inexpensive_builtin (tree decl) case BUILT_IN_LABS: case BUILT_IN_LLABS: case BUILT_IN_PREFETCH: + case BUILT_IN_ACC_ON_DEVICE: return true; default: diff --git a/gcc/builtins.def b/gcc/builtins.def index 5a7ed10f550..e3153bf8cfd 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -146,10 +146,20 @@ along with GCC; see the file COPYING3. If not see DEF_BUILTIN (ENUM, NAME, BUILT_IN_NORMAL, BT_LAST, BT_LAST, false, false, \ false, ATTR_LAST, false, false) -/* Builtin used by the implementation of GNU OpenMP. None of these are - actually implemented in the compiler; they're all in libgomp. */ +/* Builtin used by the implementation of OpenACC and OpenMP. Few of these are + actually implemented in the compiler; most are in libgomp. */ /* These builtins also need to be enabled in offloading compilers invoked from mkoffload; for that purpose, we're checking the -foffload-abi flag here. */ +#undef DEF_GOACC_BUILTIN +#define DEF_GOACC_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ + DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ + false, true, true, ATTRS, false, \ + (flag_openacc \ + || flag_offload_abi != OFFLOAD_ABI_UNSET)) +#undef DEF_GOACC_BUILTIN_COMPILER +#define DEF_GOACC_BUILTIN_COMPILER(ENUM, NAME, TYPE, ATTRS) \ + DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ + flag_openacc, true, true, ATTRS, false, true) #undef DEF_GOMP_BUILTIN #define DEF_GOMP_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ @@ -897,7 +907,7 @@ DEF_GCC_BUILTIN (BUILT_IN_LINE, "LINE", BT_FN_INT, ATTR_NOTHROW_LEAF_LIST) /* Synchronization Primitives. */ #include "sync-builtins.def" -/* OpenMP builtins. */ +/* Offloading and Multi Processing builtins. */ #include "omp-builtins.def" /* Cilk keywords builtins. */ diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 976404579d2..e0ad21506ad 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,41 @@ +2015-01-15 Thomas Schwinge + Bernd Schmidt + James Norris + Cesar Philippidis + Ilmir Usmanov + Jakub Jelinek + + * c.opt (fopenacc): New option. + * c-cppbuiltin.c (c_cpp_builtins): Conditionally define _OPENACC. + * c-common.c (DEF_FUNCTION_TYPE_VAR_8, DEF_FUNCTION_TYPE_VAR_12): + New macros. + * c-common.h (c_finish_oacc_wait): New prototype. + * c-omp.c: Include "omp-low.h" and "gomp-constants.h". + (c_finish_oacc_wait): New function. + * c-pragma.c (oacc_pragmas): New variable. + (c_pp_lookup_pragma, init_pragma): Handle it. + * c-pragma.h (enum pragma_kind): Add PRAGMA_OACC_CACHE, + PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, + PRAGMA_OACC_UPDATE, PRAGMA_OACC_WAIT. + (enum pragma_omp_clause): Add PRAGMA_OACC_CLAUSE_ASYNC, + PRAGMA_OACC_CLAUSE_AUTO, PRAGMA_OACC_CLAUSE_COLLAPSE, + PRAGMA_OACC_CLAUSE_COPY, PRAGMA_OACC_CLAUSE_COPYIN, + PRAGMA_OACC_CLAUSE_COPYOUT, PRAGMA_OACC_CLAUSE_CREATE, + PRAGMA_OACC_CLAUSE_DELETE, PRAGMA_OACC_CLAUSE_DEVICE, + PRAGMA_OACC_CLAUSE_DEVICEPTR, PRAGMA_OACC_CLAUSE_FIRSTPRIVATE, + PRAGMA_OACC_CLAUSE_GANG, PRAGMA_OACC_CLAUSE_HOST, + PRAGMA_OACC_CLAUSE_IF, PRAGMA_OACC_CLAUSE_NUM_GANGS, + PRAGMA_OACC_CLAUSE_NUM_WORKERS, PRAGMA_OACC_CLAUSE_PRESENT, + PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY, + PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN, + PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT, + PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE, PRAGMA_OACC_CLAUSE_PRIVATE, + PRAGMA_OACC_CLAUSE_REDUCTION, PRAGMA_OACC_CLAUSE_SELF, + PRAGMA_OACC_CLAUSE_SEQ, PRAGMA_OACC_CLAUSE_VECTOR, + PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, + PRAGMA_OACC_CLAUSE_WORKER. + 2015-01-14 Marcos Diaz * c-cppbuiltin.c (c_cpp_builtins): New cpp define __SSP_EXPLICIT__ diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 054f5a0e94a..eb132c537fe 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -5234,6 +5234,11 @@ enum c_builtin_type #define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME, #define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ NAME, +#define DEF_FUNCTION_TYPE_VAR_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) NAME, +#define DEF_FUNCTION_TYPE_VAR_12(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, \ + ARG12) NAME, #define DEF_POINTER_TYPE(NAME, TYPE) NAME, #include "builtin-types.def" #undef DEF_PRIMITIVE_TYPE @@ -5252,6 +5257,8 @@ enum c_builtin_type #undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_5 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE BT_LAST }; @@ -5344,6 +5351,14 @@ c_define_builtins (tree va_list_ref_type_node, tree va_list_arg_type_node) def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4); #define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5); +#define DEF_FUNCTION_TYPE_VAR_8(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) \ + def_fn_type (ENUM, RETURN, 1, 8, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \ + ARG7, ARG8); +#define DEF_FUNCTION_TYPE_VAR_12(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, ARG12) \ + def_fn_type (ENUM, RETURN, 1, 12, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \ + ARG7, ARG8, ARG9, ARG10, ARG11, ARG12); #define DEF_POINTER_TYPE(ENUM, TYPE) \ builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]); @@ -5365,6 +5380,8 @@ c_define_builtins (tree va_list_ref_type_node, tree va_list_arg_type_node) #undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_5 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE builtin_types[(int) BT_LAST] = NULL_TREE; diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index bc2cdeea299..5b2c5ab9c71 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -1248,6 +1248,7 @@ extern void c_finish_omp_taskwait (location_t); extern void c_finish_omp_taskyield (location_t); extern tree c_finish_omp_for (location_t, enum tree_code, tree, tree, tree, tree, tree, tree); +extern tree c_finish_oacc_wait (location_t, tree, tree); extern void c_omp_split_clauses (location_t, enum tree_code, omp_clause_mask, tree, tree *); extern tree c_omp_declare_simd_clauses_to_numbers (tree, tree); diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c index 0b00d59fd36..19365920a86 100644 --- a/gcc/c-family/c-cppbuiltin.c +++ b/gcc/c-family/c-cppbuiltin.c @@ -1221,6 +1221,9 @@ c_cpp_builtins (cpp_reader *pfile) else if (flag_stack_protect == 1) cpp_define (pfile, "__SSP__=1"); + if (flag_openacc) + cpp_define (pfile, "_OPENACC=201306"); + if (flag_openmp) cpp_define (pfile, "_OPENMP=201307"); diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c index 0523b4c04d7..87150459f81 100644 --- a/gcc/c-family/c-omp.c +++ b/gcc/c-family/c-omp.c @@ -1,4 +1,4 @@ -/* This file contains routines to construct GNU OpenMP constructs, +/* This file contains routines to construct OpenACC and OpenMP constructs, called from parsing in the C and C++ front ends. Copyright (C) 2005-2015 Free Software Foundation, Inc. @@ -39,8 +39,48 @@ along with GCC; see the file COPYING3. If not see #include "c-pragma.h" #include "gimple-expr.h" #include "langhooks.h" +#include "omp-low.h" +#include "gomp-constants.h" +/* Complete a #pragma oacc wait construct. LOC is the location of + the #pragma. */ + +tree +c_finish_oacc_wait (location_t loc, tree parms, tree clauses) +{ + const int nparms = list_length (parms); + tree stmt, t; + vec *args; + + vec_alloc (args, nparms + 2); + stmt = builtin_decl_explicit (BUILT_IN_GOACC_WAIT); + + if (find_omp_clause (clauses, OMP_CLAUSE_ASYNC)) + t = OMP_CLAUSE_ASYNC_EXPR (clauses); + else + t = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC); + + args->quick_push (t); + args->quick_push (build_int_cst (integer_type_node, nparms)); + + for (t = parms; t; t = TREE_CHAIN (t)) + { + if (TREE_CODE (OMP_CLAUSE_WAIT_EXPR (t)) == INTEGER_CST) + args->quick_push (build_int_cst (integer_type_node, + TREE_INT_CST_LOW (OMP_CLAUSE_WAIT_EXPR (t)))); + else + args->quick_push (OMP_CLAUSE_WAIT_EXPR (t)); + } + + stmt = build_call_expr_loc_vec (loc, stmt, args); + add_stmt (stmt); + + vec_free (args); + + return stmt; +} + /* Complete a #pragma omp master construct. STMT is the structured-block that follows the pragma. LOC is the l*/ @@ -303,7 +343,7 @@ c_finish_omp_flush (location_t loc) } -/* Check and canonicalize #pragma omp for increment expression. +/* Check and canonicalize OMP_FOR increment expression. Helper function for c_finish_omp_for. */ static tree @@ -391,7 +431,7 @@ c_omp_for_incr_canonicalize_ptr (location_t loc, tree decl, tree incr) return incr; } -/* Validate and emit code for the OpenMP directive #pragma omp for. +/* Validate and generate OMP_FOR. DECLV is a vector of iteration variables, for each collapsed loop. INITV, CONDV and INCRV are vectors containing initialization expressions, controlling predicates and increment expressions. diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index 16c70e5cb9e..718a310cf50 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -1190,6 +1190,17 @@ typedef struct static vec registered_pp_pragmas; struct omp_pragma_def { const char *name; unsigned int id; }; +static const struct omp_pragma_def oacc_pragmas[] = { + { "cache", PRAGMA_OACC_CACHE }, + { "data", PRAGMA_OACC_DATA }, + { "enter", PRAGMA_OACC_ENTER_DATA }, + { "exit", PRAGMA_OACC_EXIT_DATA }, + { "kernels", PRAGMA_OACC_KERNELS }, + { "loop", PRAGMA_OACC_LOOP }, + { "parallel", PRAGMA_OACC_PARALLEL }, + { "update", PRAGMA_OACC_UPDATE }, + { "wait", PRAGMA_OACC_WAIT } +}; static const struct omp_pragma_def omp_pragmas[] = { { "atomic", PRAGMA_OMP_ATOMIC }, { "barrier", PRAGMA_OMP_BARRIER }, @@ -1222,11 +1233,20 @@ static const struct omp_pragma_def omp_pragmas_simd[] = { void c_pp_lookup_pragma (unsigned int id, const char **space, const char **name) { + const int n_oacc_pragmas = sizeof (oacc_pragmas) / sizeof (*oacc_pragmas); const int n_omp_pragmas = sizeof (omp_pragmas) / sizeof (*omp_pragmas); const int n_omp_pragmas_simd = sizeof (omp_pragmas_simd) / sizeof (*omp_pragmas); int i; + for (i = 0; i < n_oacc_pragmas; ++i) + if (oacc_pragmas[i].id == id) + { + *space = "acc"; + *name = oacc_pragmas[i].name; + return; + } + for (i = 0; i < n_omp_pragmas; ++i) if (omp_pragmas[i].id == id) { @@ -1393,6 +1413,17 @@ c_invoke_pragma_handler (unsigned int id) void init_pragma (void) { + if (flag_openacc) + { + const int n_oacc_pragmas + = sizeof (oacc_pragmas) / sizeof (*oacc_pragmas); + int i; + + for (i = 0; i < n_oacc_pragmas; ++i) + cpp_register_deferred_pragma (parse_in, "acc", oacc_pragmas[i].name, + oacc_pragmas[i].id, true, true); + } + if (flag_openmp) { const int n_omp_pragmas = sizeof (omp_pragmas) / sizeof (*omp_pragmas); diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index cd9fb2712e1..eff94c11483 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -27,6 +27,15 @@ along with GCC; see the file COPYING3. If not see typedef enum pragma_kind { PRAGMA_NONE = 0, + PRAGMA_OACC_CACHE, + PRAGMA_OACC_DATA, + PRAGMA_OACC_ENTER_DATA, + PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_KERNELS, + PRAGMA_OACC_LOOP, + PRAGMA_OACC_PARALLEL, + PRAGMA_OACC_UPDATE, + PRAGMA_OACC_WAIT, PRAGMA_OMP_ATOMIC, PRAGMA_OMP_BARRIER, PRAGMA_OMP_CANCEL, @@ -65,7 +74,7 @@ typedef enum pragma_kind { } pragma_kind; -/* All clauses defined by OpenMP 2.5, 3.0, 3.1 and 4.0. +/* All clauses defined by OpenACC 2.0, and OpenMP 2.5, 3.0, 3.1, and 4.0. Used internally by both C and C++ parsers. */ typedef enum pragma_omp_clause { PRAGMA_OMP_CLAUSE_NONE = 0, @@ -118,7 +127,38 @@ typedef enum pragma_omp_clause { PRAGMA_CILK_CLAUSE_FIRSTPRIVATE = PRAGMA_OMP_CLAUSE_FIRSTPRIVATE, PRAGMA_CILK_CLAUSE_LASTPRIVATE = PRAGMA_OMP_CLAUSE_LASTPRIVATE, PRAGMA_CILK_CLAUSE_REDUCTION = PRAGMA_OMP_CLAUSE_REDUCTION, - PRAGMA_CILK_CLAUSE_UNIFORM = PRAGMA_OMP_CLAUSE_UNIFORM + PRAGMA_CILK_CLAUSE_UNIFORM = PRAGMA_OMP_CLAUSE_UNIFORM, + + /* Clauses for OpenACC. */ + PRAGMA_OACC_CLAUSE_ASYNC = PRAGMA_CILK_CLAUSE_VECTORLENGTH + 1, + PRAGMA_OACC_CLAUSE_AUTO, + PRAGMA_OACC_CLAUSE_COPY, + PRAGMA_OACC_CLAUSE_COPYOUT, + PRAGMA_OACC_CLAUSE_CREATE, + PRAGMA_OACC_CLAUSE_DELETE, + PRAGMA_OACC_CLAUSE_DEVICEPTR, + PRAGMA_OACC_CLAUSE_GANG, + PRAGMA_OACC_CLAUSE_HOST, + PRAGMA_OACC_CLAUSE_NUM_GANGS, + PRAGMA_OACC_CLAUSE_NUM_WORKERS, + PRAGMA_OACC_CLAUSE_PRESENT, + PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY, + PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN, + PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT, + PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE, + PRAGMA_OACC_CLAUSE_SELF, + PRAGMA_OACC_CLAUSE_SEQ, + PRAGMA_OACC_CLAUSE_VECTOR, + PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, + PRAGMA_OACC_CLAUSE_WAIT, + PRAGMA_OACC_CLAUSE_WORKER, + PRAGMA_OACC_CLAUSE_COLLAPSE = PRAGMA_OMP_CLAUSE_COLLAPSE, + PRAGMA_OACC_CLAUSE_COPYIN = PRAGMA_OMP_CLAUSE_COPYIN, + PRAGMA_OACC_CLAUSE_DEVICE = PRAGMA_OMP_CLAUSE_DEVICE, + PRAGMA_OACC_CLAUSE_FIRSTPRIVATE = PRAGMA_OMP_CLAUSE_FIRSTPRIVATE, + PRAGMA_OACC_CLAUSE_IF = PRAGMA_OMP_CLAUSE_IF, + PRAGMA_OACC_CLAUSE_PRIVATE = PRAGMA_OMP_CLAUSE_PRIVATE, + PRAGMA_OACC_CLAUSE_REDUCTION = PRAGMA_OMP_CLAUSE_REDUCTION } pragma_omp_clause; extern struct cpp_reader* parse_in; diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index e61fc567c9e..62b6c68576d 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1283,6 +1283,10 @@ fobjc-std=objc1 ObjC ObjC++ Var(flag_objc1_only) Conform to the Objective-C 1.0 language as implemented in GCC 4.0 +fopenacc +C ObjC C++ ObjC++ Var(flag_openacc) +Enable OpenACC + fopenmp C ObjC C++ ObjC++ Var(flag_openmp) Enable OpenMP (implies -frecursive in Fortran) diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index f42f53b9024..46524098eef 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,54 @@ +2015-01-15 Thomas Schwinge + Bernd Schmidt + Cesar Philippidis + James Norris + Jakub Jelinek + Ilmir Usmanov + + * c-parser.c: Include "gomp-constants.h". + (c_parser_omp_clause_map): Use enum gomp_map_kind instead of enum + omp_clause_map_kind. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. + Use OMP_CLAUSE_SET_MAP_KIND. + (c_parser_pragma): Handle PRAGMA_OACC_ENTER_DATA, + PRAGMA_OACC_EXIT_DATA, PRAGMA_OACC_UPDATE. + (c_parser_omp_construct): Handle PRAGMA_OACC_CACHE, + PRAGMA_OACC_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, + PRAGMA_OACC_PARALLEL, PRAGMA_OACC_WAIT. + (c_parser_omp_clause_name): Handle "auto", "async", "copy", + "copyout", "create", "delete", "deviceptr", "gang", "host", + "num_gangs", "num_workers", "present", "present_or_copy", "pcopy", + "present_or_copyin", "pcopyin", "present_or_copyout", "pcopyout", + "present_or_create", "pcreate", "seq", "self", "vector", + "vector_length", "wait", "worker". + (OACC_DATA_CLAUSE_MASK, OACC_KERNELS_CLAUSE_MASK) + (OACC_ENTER_DATA_CLAUSE_MASK, OACC_EXIT_DATA_CLAUSE_MASK) + (OACC_LOOP_CLAUSE_MASK, OACC_PARALLEL_CLAUSE_MASK) + (OACC_UPDATE_CLAUSE_MASK, OACC_WAIT_CLAUSE_MASK): New macros. + (c_parser_omp_variable_list): Handle OMP_CLAUSE__CACHE_. + (c_parser_oacc_wait_list, c_parser_oacc_data_clause) + (c_parser_oacc_data_clause_deviceptr) + (c_parser_omp_clause_num_gangs, c_parser_omp_clause_num_workers) + (c_parser_oacc_clause_async, c_parser_oacc_clause_wait) + (c_parser_omp_clause_vector_length, c_parser_oacc_all_clauses) + (c_parser_oacc_cache, c_parser_oacc_data, c_parser_oacc_kernels) + (c_parser_oacc_enter_exit_data, c_parser_oacc_loop) + (c_parser_oacc_parallel, c_parser_oacc_update) + (c_parser_oacc_wait): New functions. + * c-tree.h (c_finish_oacc_parallel, c_finish_oacc_kernels) + (c_finish_oacc_data): New prototypes. + * c-typeck.c: Include "gomp-constants.h". + (handle_omp_array_sections): Handle GOMP_MAP_FORCE_DEVICEPTR. Use + GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use + OMP_CLAUSE_SET_MAP_KIND. + (c_finish_oacc_parallel, c_finish_oacc_kernels) + (c_finish_oacc_data): New functions. + (c_finish_omp_clauses): Handle OMP_CLAUSE__CACHE_, + OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, + OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, + OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ, OMP_CLAUSE_GANG, + OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, and OMP_CLAUSE_MAP's + GOMP_MAP_FORCE_DEVICEPTR. + 2015-01-09 Michael Collison * c-array-notation.c: Include hash-set.h, machmode.h, diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index d3094656208..665ee42cf2d 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see #include "plugin.h" #include "omp-low.h" #include "builtins.h" +#include "gomp-constants.h" /* Initialization routine for this file. */ @@ -1249,10 +1250,15 @@ static vec *c_parser_expr_list (c_parser *, bool, bool, vec **, location_t *, tree *, vec *, unsigned int * = NULL); +static void c_parser_oacc_enter_exit_data (c_parser *, bool); +static void c_parser_oacc_update (c_parser *); +static tree c_parser_oacc_loop (location_t, c_parser *, char *); static void c_parser_omp_construct (c_parser *); static void c_parser_omp_threadprivate (c_parser *); static void c_parser_omp_barrier (c_parser *); static void c_parser_omp_flush (c_parser *); +static tree c_parser_omp_for_loop (location_t, c_parser *, enum tree_code, + tree, tree *); static void c_parser_omp_taskwait (c_parser *); static void c_parser_omp_taskyield (c_parser *); static void c_parser_omp_cancel (c_parser *); @@ -4492,6 +4498,14 @@ c_parser_initval (c_parser *parser, struct c_expr *after, Although they are erroneous if the labels declared aren't defined, is it useful for the syntax to be this way? + OpenACC: + + block-item: + openacc-directive + + openacc-directive: + update-directive + OpenMP: block-item: @@ -4838,6 +4852,29 @@ c_parser_label (c_parser *parser) @throw expression ; @throw ; + OpenACC: + + statement: + openacc-construct + + openacc-construct: + parallel-construct + kernels-construct + data-construct + loop-construct + + parallel-construct: + parallel-directive structured-block + + kernels-construct: + kernels-directive structured-block + + data-construct: + data-directive structured-block + + loop-construct: + loop-directive structured-block + OpenMP: statement: @@ -9570,6 +9607,25 @@ c_parser_pragma (c_parser *parser, enum pragma_context context) switch (id) { + case PRAGMA_OACC_ENTER_DATA: + c_parser_oacc_enter_exit_data (parser, true); + return false; + + case PRAGMA_OACC_EXIT_DATA: + c_parser_oacc_enter_exit_data (parser, false); + return false; + + case PRAGMA_OACC_UPDATE: + if (context != pragma_compound) + { + if (context == pragma_stmt) + c_parser_error (parser, "%<#pragma acc update%> may only be " + "used in compound statements"); + goto bad_stmt; + } + c_parser_oacc_update (parser); + return false; + case PRAGMA_OMP_BARRIER: if (context != pragma_compound) { @@ -9772,7 +9828,7 @@ c_parser_pragma_pch_preprocess (c_parser *parser) c_common_pch_pragma (parse_in, TREE_STRING_POINTER (name)); } -/* OpenMP 2.5 / 3.0 / 3.1 / 4.0 parsing routines. */ +/* OpenACC and OpenMP parsing routines. */ /* Returns name of the next clause. If the clause is not recognized PRAGMA_OMP_CLAUSE_NONE is returned and @@ -9784,7 +9840,9 @@ c_parser_omp_clause_name (c_parser *parser) { pragma_omp_clause result = PRAGMA_OMP_CLAUSE_NONE; - if (c_parser_next_token_is_keyword (parser, RID_IF)) + if (c_parser_next_token_is_keyword (parser, RID_AUTO)) + result = PRAGMA_OACC_CLAUSE_AUTO; + else if (c_parser_next_token_is_keyword (parser, RID_IF)) result = PRAGMA_OMP_CLAUSE_IF; else if (c_parser_next_token_is_keyword (parser, RID_DEFAULT)) result = PRAGMA_OMP_CLAUSE_DEFAULT; @@ -9799,20 +9857,32 @@ c_parser_omp_clause_name (c_parser *parser) case 'a': if (!strcmp ("aligned", p)) result = PRAGMA_OMP_CLAUSE_ALIGNED; + else if (!strcmp ("async", p)) + result = PRAGMA_OACC_CLAUSE_ASYNC; break; case 'c': if (!strcmp ("collapse", p)) result = PRAGMA_OMP_CLAUSE_COLLAPSE; + else if (!strcmp ("copy", p)) + result = PRAGMA_OACC_CLAUSE_COPY; else if (!strcmp ("copyin", p)) result = PRAGMA_OMP_CLAUSE_COPYIN; + else if (!strcmp ("copyout", p)) + result = PRAGMA_OACC_CLAUSE_COPYOUT; else if (!strcmp ("copyprivate", p)) result = PRAGMA_OMP_CLAUSE_COPYPRIVATE; + else if (!strcmp ("create", p)) + result = PRAGMA_OACC_CLAUSE_CREATE; break; case 'd': - if (!strcmp ("depend", p)) + if (!strcmp ("delete", p)) + result = PRAGMA_OACC_CLAUSE_DELETE; + else if (!strcmp ("depend", p)) result = PRAGMA_OMP_CLAUSE_DEPEND; else if (!strcmp ("device", p)) result = PRAGMA_OMP_CLAUSE_DEVICE; + else if (!strcmp ("deviceptr", p)) + result = PRAGMA_OACC_CLAUSE_DEVICEPTR; else if (!strcmp ("dist_schedule", p)) result = PRAGMA_OMP_CLAUSE_DIST_SCHEDULE; break; @@ -9824,6 +9894,14 @@ c_parser_omp_clause_name (c_parser *parser) else if (!strcmp ("from", p)) result = PRAGMA_OMP_CLAUSE_FROM; break; + case 'g': + if (!strcmp ("gang", p)) + result = PRAGMA_OACC_CLAUSE_GANG; + break; + case 'h': + if (!strcmp ("host", p)) + result = PRAGMA_OACC_CLAUSE_HOST; + break; case 'i': if (!strcmp ("inbranch", p)) result = PRAGMA_OMP_CLAUSE_INBRANCH; @@ -9847,10 +9925,14 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_NOTINBRANCH; else if (!strcmp ("nowait", p)) result = PRAGMA_OMP_CLAUSE_NOWAIT; + else if (!strcmp ("num_gangs", p)) + result = PRAGMA_OACC_CLAUSE_NUM_GANGS; else if (!strcmp ("num_teams", p)) result = PRAGMA_OMP_CLAUSE_NUM_TEAMS; else if (!strcmp ("num_threads", p)) result = PRAGMA_OMP_CLAUSE_NUM_THREADS; + else if (!strcmp ("num_workers", p)) + result = PRAGMA_OACC_CLAUSE_NUM_WORKERS; else if (flag_cilkplus && !strcmp ("nomask", p)) result = PRAGMA_CILK_CLAUSE_NOMASK; break; @@ -9861,6 +9943,20 @@ c_parser_omp_clause_name (c_parser *parser) case 'p': if (!strcmp ("parallel", p)) result = PRAGMA_OMP_CLAUSE_PARALLEL; + else if (!strcmp ("present", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT; + else if (!strcmp ("present_or_copy", p) + || !strcmp ("pcopy", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY; + else if (!strcmp ("present_or_copyin", p) + || !strcmp ("pcopyin", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN; + else if (!strcmp ("present_or_copyout", p) + || !strcmp ("pcopyout", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT; + else if (!strcmp ("present_or_create", p) + || !strcmp ("pcreate", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE; else if (!strcmp ("private", p)) result = PRAGMA_OMP_CLAUSE_PRIVATE; else if (!strcmp ("proc_bind", p)) @@ -9877,10 +9973,14 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_SCHEDULE; else if (!strcmp ("sections", p)) result = PRAGMA_OMP_CLAUSE_SECTIONS; + else if (!strcmp ("seq", p)) + result = PRAGMA_OACC_CLAUSE_SEQ; else if (!strcmp ("shared", p)) result = PRAGMA_OMP_CLAUSE_SHARED; else if (!strcmp ("simdlen", p)) result = PRAGMA_OMP_CLAUSE_SIMDLEN; + else if (!strcmp ("self", p)) + result = PRAGMA_OACC_CLAUSE_SELF; break; case 't': if (!strcmp ("taskgroup", p)) @@ -9897,9 +9997,19 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; break; case 'v': - if (flag_cilkplus && !strcmp ("vectorlength", p)) + if (!strcmp ("vector", p)) + result = PRAGMA_OACC_CLAUSE_VECTOR; + else if (!strcmp ("vector_length", p)) + result = PRAGMA_OACC_CLAUSE_VECTOR_LENGTH; + else if (flag_cilkplus && !strcmp ("vectorlength", p)) result = PRAGMA_CILK_CLAUSE_VECTORLENGTH; break; + case 'w': + if (!strcmp ("wait", p)) + result = PRAGMA_OACC_CLAUSE_WAIT; + else if (!strcmp ("worker", p)) + result = PRAGMA_OACC_CLAUSE_WORKER; + break; } } @@ -9926,7 +10036,57 @@ check_no_duplicate_clause (tree clauses, enum omp_clause_code code, } } -/* OpenMP 2.5: +/* OpenACC 2.0 + Parse wait clause or wait directive parameters. */ + +static tree +c_parser_oacc_wait_list (c_parser *parser, location_t clause_loc, tree list) +{ + vec *args; + tree t, args_tree; + + if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>")) + return list; + + args = c_parser_expr_list (parser, false, true, NULL, NULL, NULL, NULL); + + if (args->length () == 0) + { + c_parser_error (parser, "expected integer expression before ')'"); + release_tree_vector (args); + return list; + } + + args_tree = build_tree_list_vec (args); + + for (t = args_tree; t; t = TREE_CHAIN (t)) + { + tree targ = TREE_VALUE (t); + + if (targ != error_mark_node) + { + if (!INTEGRAL_TYPE_P (TREE_TYPE (targ))) + { + c_parser_error (parser, "expression must be integral"); + targ = error_mark_node; + } + else + { + tree c = build_omp_clause (clause_loc, OMP_CLAUSE_WAIT); + + OMP_CLAUSE_DECL (c) = targ; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + } + } + } + + release_tree_vector (args); + c_parser_require (parser, CPP_CLOSE_PAREN, "expected %<)%>"); + return list; +} + +/* OpenACC 2.0, OpenMP 2.5: variable-list: identifier variable-list , identifier @@ -9967,6 +10127,14 @@ c_parser_omp_variable_list (c_parser *parser, { switch (kind) { + case OMP_CLAUSE__CACHE_: + if (c_parser_peek_token (parser)->type != CPP_OPEN_SQUARE) + { + c_parser_error (parser, "expected %<[%>"); + t = error_mark_node; + break; + } + /* FALL THROUGH. */ case OMP_CLAUSE_MAP: case OMP_CLAUSE_FROM: case OMP_CLAUSE_TO: @@ -10005,6 +10173,26 @@ c_parser_omp_variable_list (c_parser *parser, t = error_mark_node; break; } + + if (kind == OMP_CLAUSE__CACHE_) + { + if (TREE_CODE (low_bound) != INTEGER_CST + && !TREE_READONLY (low_bound)) + { + error_at (clause_loc, + "%qD is not a constant", low_bound); + t = error_mark_node; + } + + if (TREE_CODE (length) != INTEGER_CST + && !TREE_READONLY (length)) + { + error_at (clause_loc, + "%qD is not a constant", length); + t = error_mark_node; + } + } + t = tree_cons (low_bound, length, t); } break; @@ -10033,7 +10221,7 @@ c_parser_omp_variable_list (c_parser *parser, } /* Similarly, but expect leading and trailing parenthesis. This is a very - common case for omp clauses. */ + common case for OpenACC and OpenMP clauses. */ static tree c_parser_omp_var_list_parens (c_parser *parser, enum omp_clause_code kind, @@ -10050,7 +10238,119 @@ c_parser_omp_var_list_parens (c_parser *parser, enum omp_clause_code kind, return list; } -/* OpenMP 3.0: +/* OpenACC 2.0: + copy ( variable-list ) + copyin ( variable-list ) + copyout ( variable-list ) + create ( variable-list ) + delete ( variable-list ) + present ( variable-list ) + present_or_copy ( variable-list ) + pcopy ( variable-list ) + present_or_copyin ( variable-list ) + pcopyin ( variable-list ) + present_or_copyout ( variable-list ) + pcopyout ( variable-list ) + present_or_create ( variable-list ) + pcreate ( variable-list ) */ + +static tree +c_parser_oacc_data_clause (c_parser *parser, pragma_omp_clause c_kind, + tree list) +{ + enum gomp_map_kind kind; + switch (c_kind) + { + case PRAGMA_OACC_CLAUSE_COPY: + kind = GOMP_MAP_FORCE_TOFROM; + break; + case PRAGMA_OACC_CLAUSE_COPYIN: + kind = GOMP_MAP_FORCE_TO; + break; + case PRAGMA_OACC_CLAUSE_COPYOUT: + kind = GOMP_MAP_FORCE_FROM; + break; + case PRAGMA_OACC_CLAUSE_CREATE: + kind = GOMP_MAP_FORCE_ALLOC; + break; + case PRAGMA_OACC_CLAUSE_DELETE: + kind = GOMP_MAP_FORCE_DEALLOC; + break; + case PRAGMA_OACC_CLAUSE_DEVICE: + kind = GOMP_MAP_FORCE_TO; + break; + case PRAGMA_OACC_CLAUSE_HOST: + case PRAGMA_OACC_CLAUSE_SELF: + kind = GOMP_MAP_FORCE_FROM; + break; + case PRAGMA_OACC_CLAUSE_PRESENT: + kind = GOMP_MAP_FORCE_PRESENT; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY: + kind = GOMP_MAP_TOFROM; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN: + kind = GOMP_MAP_TO; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT: + kind = GOMP_MAP_FROM; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE: + kind = GOMP_MAP_ALLOC; + break; + default: + gcc_unreachable (); + } + tree nl, c; + nl = c_parser_omp_var_list_parens (parser, OMP_CLAUSE_MAP, list); + + for (c = nl; c != list; c = OMP_CLAUSE_CHAIN (c)) + OMP_CLAUSE_SET_MAP_KIND (c, kind); + + return nl; +} + +/* OpenACC 2.0: + deviceptr ( variable-list ) */ + +static tree +c_parser_oacc_data_clause_deviceptr (c_parser *parser, tree list) +{ + location_t loc = c_parser_peek_token (parser)->location; + tree vars, t; + + /* Can't use OMP_CLAUSE_MAP here (that is, can't use the generic + c_parser_oacc_data_clause), as for PRAGMA_OACC_CLAUSE_DEVICEPTR, + variable-list must only allow for pointer variables. */ + vars = c_parser_omp_var_list_parens (parser, OMP_CLAUSE_ERROR, NULL); + for (t = vars; t && t; t = TREE_CHAIN (t)) + { + tree v = TREE_PURPOSE (t); + + /* FIXME diagnostics: Ideally we should keep individual + locations for all the variables in the var list to make the + following errors more precise. Perhaps + c_parser_omp_var_list_parens() should construct a list of + locations to go along with the var list. */ + + if (TREE_CODE (v) != VAR_DECL) + error_at (loc, "%qD is not a variable", v); + else if (TREE_TYPE (v) == error_mark_node) + ; + else if (!POINTER_TYPE_P (TREE_TYPE (v))) + error_at (loc, "%qD is not a pointer variable", v); + + tree u = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (u, GOMP_MAP_FORCE_DEVICEPTR); + OMP_CLAUSE_DECL (u) = v; + OMP_CLAUSE_CHAIN (u) = list; + list = u; + } + + return list; +} + +/* OpenACC 2.0, OpenMP 3.0: collapse ( constant-expression ) */ static tree @@ -10193,7 +10493,7 @@ c_parser_omp_clause_final (c_parser *parser, tree list) return list; } -/* OpenMP 2.5: +/* OpenACC, OpenMP 2.5: if ( expression ) */ static tree @@ -10261,6 +10561,51 @@ c_parser_omp_clause_nowait (c_parser *parser ATTRIBUTE_UNUSED, tree list) return c; } +/* OpenACC: + num_gangs ( expression ) */ + +static tree +c_parser_omp_clause_num_gangs (c_parser *parser, tree list) +{ + location_t num_gangs_loc = c_parser_peek_token (parser)->location; + if (c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>")) + { + location_t expr_loc = c_parser_peek_token (parser)->location; + tree c, t = c_parser_expression (parser).value; + mark_exp_read (t); + t = c_fully_fold (t, false, NULL); + + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); + + if (!INTEGRAL_TYPE_P (TREE_TYPE (t))) + { + c_parser_error (parser, "expected integer expression"); + return list; + } + + /* Attempt to statically determine when the number isn't positive. */ + c = fold_build2_loc (expr_loc, LE_EXPR, boolean_type_node, t, + build_int_cst (TREE_TYPE (t), 0)); + if (CAN_HAVE_LOCATION_P (c)) + SET_EXPR_LOCATION (c, expr_loc); + if (c == boolean_true_node) + { + warning_at (expr_loc, 0, + "% value must be positive"); + t = integer_one_node; + } + + check_no_duplicate_clause (list, OMP_CLAUSE_NUM_GANGS, "num_gangs"); + + c = build_omp_clause (num_gangs_loc, OMP_CLAUSE_NUM_GANGS); + OMP_CLAUSE_NUM_GANGS_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + } + + return list; +} + /* OpenMP 2.5: num_threads ( expression ) */ @@ -10306,6 +10651,100 @@ c_parser_omp_clause_num_threads (c_parser *parser, tree list) return list; } +/* OpenACC: + num_workers ( expression ) */ + +static tree +c_parser_omp_clause_num_workers (c_parser *parser, tree list) +{ + location_t num_workers_loc = c_parser_peek_token (parser)->location; + if (c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>")) + { + location_t expr_loc = c_parser_peek_token (parser)->location; + tree c, t = c_parser_expression (parser).value; + mark_exp_read (t); + t = c_fully_fold (t, false, NULL); + + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); + + if (!INTEGRAL_TYPE_P (TREE_TYPE (t))) + { + c_parser_error (parser, "expected integer expression"); + return list; + } + + /* Attempt to statically determine when the number isn't positive. */ + c = fold_build2_loc (expr_loc, LE_EXPR, boolean_type_node, t, + build_int_cst (TREE_TYPE (t), 0)); + if (CAN_HAVE_LOCATION_P (c)) + SET_EXPR_LOCATION (c, expr_loc); + if (c == boolean_true_node) + { + warning_at (expr_loc, 0, + "% value must be positive"); + t = integer_one_node; + } + + check_no_duplicate_clause (list, OMP_CLAUSE_NUM_WORKERS, "num_workers"); + + c = build_omp_clause (num_workers_loc, OMP_CLAUSE_NUM_WORKERS); + OMP_CLAUSE_NUM_WORKERS_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + } + + return list; +} + +/* OpenACC: + async [( int-expr )] */ + +static tree +c_parser_oacc_clause_async (c_parser *parser, tree list) +{ + tree c, t; + location_t loc = c_parser_peek_token (parser)->location; + + t = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL); + + if (c_parser_peek_token (parser)->type == CPP_OPEN_PAREN) + { + c_parser_consume_token (parser); + + t = c_parser_expression (parser).value; + if (!INTEGRAL_TYPE_P (TREE_TYPE (t))) + c_parser_error (parser, "expected integer expression"); + else if (t == error_mark_node + || !c_parser_require (parser, CPP_CLOSE_PAREN, "expected %<)%>")) + return list; + } + else + t = c_fully_fold (t, false, NULL); + + check_no_duplicate_clause (list, OMP_CLAUSE_ASYNC, "async"); + + c = build_omp_clause (loc, OMP_CLAUSE_ASYNC); + OMP_CLAUSE_ASYNC_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + + return list; +} + +/* OpenACC: + wait ( int-expr-list ) */ + +static tree +c_parser_oacc_clause_wait (c_parser *parser, tree list) +{ + location_t clause_loc = c_parser_peek_token (parser)->location; + + if (c_parser_peek_token (parser)->type == CPP_OPEN_PAREN) + list = c_parser_oacc_wait_list (parser, clause_loc, list); + + return list; +} + /* OpenMP 2.5: ordered */ @@ -10557,6 +10996,51 @@ c_parser_omp_clause_untied (c_parser *parser ATTRIBUTE_UNUSED, tree list) return c; } +/* OpenACC: + vector_length ( expression ) */ + +static tree +c_parser_omp_clause_vector_length (c_parser *parser, tree list) +{ + location_t vector_length_loc = c_parser_peek_token (parser)->location; + if (c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>")) + { + location_t expr_loc = c_parser_peek_token (parser)->location; + tree c, t = c_parser_expression (parser).value; + mark_exp_read (t); + t = c_fully_fold (t, false, NULL); + + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); + + if (!INTEGRAL_TYPE_P (TREE_TYPE (t))) + { + c_parser_error (parser, "expected integer expression"); + return list; + } + + /* Attempt to statically determine when the number isn't positive. */ + c = fold_build2_loc (expr_loc, LE_EXPR, boolean_type_node, t, + build_int_cst (TREE_TYPE (t), 0)); + if (CAN_HAVE_LOCATION_P (c)) + SET_EXPR_LOCATION (c, expr_loc); + if (c == boolean_true_node) + { + warning_at (expr_loc, 0, + "% value must be positive"); + t = integer_one_node; + } + + check_no_duplicate_clause (list, OMP_CLAUSE_VECTOR_LENGTH, "vector_length"); + + c = build_omp_clause (vector_length_loc, OMP_CLAUSE_VECTOR_LENGTH); + OMP_CLAUSE_VECTOR_LENGTH_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + } + + return list; +} + /* OpenMP 4.0: inbranch notinbranch */ @@ -10898,7 +11382,7 @@ static tree c_parser_omp_clause_map (c_parser *parser, tree list) { location_t clause_loc = c_parser_peek_token (parser)->location; - enum omp_clause_map_kind kind = OMP_CLAUSE_MAP_TOFROM; + enum gomp_map_kind kind = GOMP_MAP_TOFROM; tree nl, c; if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>")) @@ -10909,13 +11393,13 @@ c_parser_omp_clause_map (c_parser *parser, tree list) { const char *p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value); if (strcmp ("alloc", p) == 0) - kind = OMP_CLAUSE_MAP_ALLOC; + kind = GOMP_MAP_ALLOC; else if (strcmp ("to", p) == 0) - kind = OMP_CLAUSE_MAP_TO; + kind = GOMP_MAP_TO; else if (strcmp ("from", p) == 0) - kind = OMP_CLAUSE_MAP_FROM; + kind = GOMP_MAP_FROM; else if (strcmp ("tofrom", p) == 0) - kind = OMP_CLAUSE_MAP_TOFROM; + kind = GOMP_MAP_TOFROM; else { c_parser_error (parser, "invalid map kind"); @@ -10930,7 +11414,7 @@ c_parser_omp_clause_map (c_parser *parser, tree list) nl = c_parser_omp_variable_list (parser, clause_loc, OMP_CLAUSE_MAP, list); for (c = nl; c != list; c = OMP_CLAUSE_CHAIN (c)) - OMP_CLAUSE_MAP_KIND (c) = kind; + OMP_CLAUSE_SET_MAP_KIND (c, kind); c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); return nl; @@ -11093,9 +11577,154 @@ c_parser_omp_clause_uniform (c_parser *parser, tree list) return list; } +/* Parse all OpenACC clauses. The set clauses allowed by the directive + is a bitmask in MASK. Return the list of clauses found. */ + +static tree +c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, + const char *where, bool finish_p = true) +{ + tree clauses = NULL; + bool first = true; + + while (c_parser_next_token_is_not (parser, CPP_PRAGMA_EOL)) + { + location_t here; + pragma_omp_clause c_kind; + const char *c_name; + tree prev = clauses; + + if (!first && c_parser_next_token_is (parser, CPP_COMMA)) + c_parser_consume_token (parser); + + here = c_parser_peek_token (parser)->location; + c_kind = c_parser_omp_clause_name (parser); + + switch (c_kind) + { + case PRAGMA_OACC_CLAUSE_ASYNC: + clauses = c_parser_oacc_clause_async (parser, clauses); + c_name = "async"; + break; + case PRAGMA_OACC_CLAUSE_COLLAPSE: + clauses = c_parser_omp_clause_collapse (parser, clauses); + c_name = "collapse"; + break; + case PRAGMA_OACC_CLAUSE_COPY: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "copy"; + break; + case PRAGMA_OACC_CLAUSE_COPYIN: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "copyin"; + break; + case PRAGMA_OACC_CLAUSE_COPYOUT: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "copyout"; + break; + case PRAGMA_OACC_CLAUSE_CREATE: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "create"; + break; + case PRAGMA_OACC_CLAUSE_DELETE: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "delete"; + break; + case PRAGMA_OACC_CLAUSE_DEVICE: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "device"; + break; + case PRAGMA_OACC_CLAUSE_DEVICEPTR: + clauses = c_parser_oacc_data_clause_deviceptr (parser, clauses); + c_name = "deviceptr"; + break; + case PRAGMA_OACC_CLAUSE_FIRSTPRIVATE: + clauses = c_parser_omp_clause_firstprivate (parser, clauses); + c_name = "firstprivate"; + break; + case PRAGMA_OACC_CLAUSE_HOST: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "host"; + break; + case PRAGMA_OACC_CLAUSE_IF: + clauses = c_parser_omp_clause_if (parser, clauses); + c_name = "if"; + break; + case PRAGMA_OACC_CLAUSE_NUM_GANGS: + clauses = c_parser_omp_clause_num_gangs (parser, clauses); + c_name = "num_gangs"; + break; + case PRAGMA_OACC_CLAUSE_NUM_WORKERS: + clauses = c_parser_omp_clause_num_workers (parser, clauses); + c_name = "num_workers"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_copy"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_copyin"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_copyout"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_create"; + break; + case PRAGMA_OACC_CLAUSE_PRIVATE: + clauses = c_parser_omp_clause_private (parser, clauses); + c_name = "private"; + break; + case PRAGMA_OACC_CLAUSE_REDUCTION: + clauses = c_parser_omp_clause_reduction (parser, clauses); + c_name = "reduction"; + break; + case PRAGMA_OACC_CLAUSE_SELF: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "self"; + break; + case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: + clauses = c_parser_omp_clause_vector_length (parser, clauses); + c_name = "vector_length"; + break; + case PRAGMA_OACC_CLAUSE_WAIT: + clauses = c_parser_oacc_clause_wait (parser, clauses); + c_name = "wait"; + break; + default: + c_parser_error (parser, "expected %<#pragma acc%> clause"); + goto saw_error; + } + + first = false; + + if (((mask >> c_kind) & 1) == 0 && !parser->error) + { + /* Remove the invalid clause(s) from the list to avoid + confusing the rest of the compiler. */ + clauses = prev; + error_at (here, "%qs is not valid for %qs", c_name, where); + } + } + + saw_error: + c_parser_skip_to_pragma_eol (parser); + + if (finish_p) + return c_finish_omp_clauses (clauses); + + return clauses; +} + /* Parse all OpenMP clauses. The set clauses allowed by the directive - is a bitmask in MASK. Return the list of clauses found; the result - of clause default goes in *pdefault. */ + is a bitmask in MASK. Return the list of clauses found. */ static tree c_parser_omp_all_clauses (c_parser *parser, omp_clause_mask mask, @@ -11323,7 +11952,7 @@ c_parser_omp_all_clauses (c_parser *parser, omp_clause_mask mask, return clauses; } -/* OpenMP 2.5: +/* OpenACC 2.0, OpenMP 2.5: structured-block: statement @@ -11339,6 +11968,351 @@ c_parser_omp_structured_block (c_parser *parser) return pop_stmt_list (stmt); } +/* OpenACC 2.0: + # pragma acc cache (variable-list) new-line + + LOC is the location of the #pragma token. +*/ + +static tree +c_parser_oacc_cache (location_t loc, c_parser *parser) +{ + tree stmt, clauses; + + clauses = c_parser_omp_var_list_parens (parser, OMP_CLAUSE__CACHE_, NULL); + clauses = c_finish_omp_clauses (clauses); + + c_parser_skip_to_pragma_eol (parser); + + stmt = make_node (OACC_CACHE); + TREE_TYPE (stmt) = void_type_node; + OACC_CACHE_CLAUSES (stmt) = clauses; + SET_EXPR_LOCATION (stmt, loc); + add_stmt (stmt); + + return stmt; +} + +/* OpenACC 2.0: + # pragma acc data oacc-data-clause[optseq] new-line + structured-block + + LOC is the location of the #pragma token. +*/ + +#define OACC_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) ) + +static tree +c_parser_oacc_data (location_t loc, c_parser *parser) +{ + tree stmt, clauses, block; + + clauses = c_parser_oacc_all_clauses (parser, OACC_DATA_CLAUSE_MASK, + "#pragma acc data"); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + + stmt = c_finish_oacc_data (loc, clauses, block); + + return stmt; +} + +/* OpenACC 2.0: + # pragma acc kernels oacc-kernels-clause[optseq] new-line + structured-block + + LOC is the location of the #pragma token. +*/ + +#define OACC_KERNELS_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) + +static tree +c_parser_oacc_kernels (location_t loc, c_parser *parser, char *p_name) +{ + tree stmt, clauses = NULL_TREE, block; + + strcat (p_name, " kernels"); + + if (c_parser_next_token_is (parser, CPP_NAME)) + { + const char *p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value); + if (strcmp (p, "loop") == 0) + { + c_parser_consume_token (parser); + block = c_begin_omp_parallel (); + c_parser_oacc_loop (loc, parser, p_name); + stmt = c_finish_oacc_kernels (loc, clauses, block); + OACC_KERNELS_COMBINED (stmt) = 1; + return stmt; + } + } + + clauses = c_parser_oacc_all_clauses (parser, OACC_KERNELS_CLAUSE_MASK, + p_name); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + + stmt = c_finish_oacc_kernels (loc, clauses, block); + + return stmt; +} + +/* OpenACC 2.0: + # pragma acc enter data oacc-enter-data-clause[optseq] new-line + + or + + # pragma acc exit data oacc-exit-data-clause[optseq] new-line + + + LOC is the location of the #pragma token. +*/ + +#define OACC_ENTER_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) + +#define OACC_EXIT_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DELETE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) + +static void +c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) +{ + location_t loc = c_parser_peek_token (parser)->location; + tree clauses, stmt; + + c_parser_consume_pragma (parser); + + if (!c_parser_next_token_is (parser, CPP_NAME)) + { + c_parser_error (parser, enter + ? "expected % in %<#pragma acc enter data%>" + : "expected % in %<#pragma acc exit data%>"); + c_parser_skip_to_pragma_eol (parser); + return; + } + + const char *p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value); + if (strcmp (p, "data") != 0) + { + c_parser_error (parser, "invalid pragma"); + c_parser_skip_to_pragma_eol (parser); + return; + } + + c_parser_consume_token (parser); + + if (enter) + clauses = c_parser_oacc_all_clauses (parser, OACC_ENTER_DATA_CLAUSE_MASK, + "#pragma acc enter data"); + else + clauses = c_parser_oacc_all_clauses (parser, OACC_EXIT_DATA_CLAUSE_MASK, + "#pragma acc exit data"); + + if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + { + error_at (loc, enter + ? "%<#pragma acc enter data%> has no data movement clause" + : "%<#pragma acc exit data%> has no data movement clause"); + return; + } + + stmt = enter ? make_node (OACC_ENTER_DATA) : make_node (OACC_EXIT_DATA);; + TREE_TYPE (stmt) = void_type_node; + if (enter) + OACC_ENTER_DATA_CLAUSES (stmt) = clauses; + else + OACC_EXIT_DATA_CLAUSES (stmt) = clauses; + SET_EXPR_LOCATION (stmt, loc); + add_stmt (stmt); +} + + +/* OpenACC 2.0: + + # pragma acc loop oacc-loop-clause[optseq] new-line + structured-block + + LOC is the location of the #pragma token. +*/ + +#define OACC_LOOP_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COLLAPSE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_REDUCTION) ) + +static tree +c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name) +{ + tree stmt, clauses, block; + + strcat (p_name, " loop"); + + clauses = c_parser_oacc_all_clauses (parser, OACC_LOOP_CLAUSE_MASK, p_name); + + block = c_begin_compound_stmt (true); + stmt = c_parser_omp_for_loop (loc, parser, OACC_LOOP, clauses, NULL); + block = c_end_compound_stmt (loc, block, true); + add_stmt (block); + + return stmt; +} + +/* OpenACC 2.0: + # pragma acc parallel oacc-parallel-clause[optseq] new-line + structured-block + + LOC is the location of the #pragma token. +*/ + +#define OACC_PARALLEL_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_REDUCTION) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) + +static tree +c_parser_oacc_parallel (location_t loc, c_parser *parser, char *p_name) +{ + tree stmt, clauses = NULL_TREE, block; + + strcat (p_name, " parallel"); + + if (c_parser_next_token_is (parser, CPP_NAME)) + { + const char *p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value); + if (strcmp (p, "loop") == 0) + { + c_parser_consume_token (parser); + block = c_begin_omp_parallel (); + c_parser_oacc_loop (loc, parser, p_name); + stmt = c_finish_oacc_parallel (loc, clauses, block); + OACC_PARALLEL_COMBINED (stmt) = 1; + return stmt; + } + } + + clauses = c_parser_oacc_all_clauses (parser, OACC_PARALLEL_CLAUSE_MASK, + p_name); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + + stmt = c_finish_oacc_parallel (loc, clauses, block); + + return stmt; +} + +/* OpenACC 2.0: + # pragma acc update oacc-update-clause[optseq] new-line +*/ + +#define OACC_UPDATE_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_HOST) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_SELF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) + +static void +c_parser_oacc_update (c_parser *parser) +{ + location_t loc = c_parser_peek_token (parser)->location; + + c_parser_consume_pragma (parser); + + tree clauses = c_parser_oacc_all_clauses (parser, OACC_UPDATE_CLAUSE_MASK, + "#pragma acc update"); + if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + { + error_at (loc, + "%<#pragma acc update%> must contain at least one " + "% or % clause"); + return; + } + + if (parser->error) + return; + + tree stmt = make_node (OACC_UPDATE); + TREE_TYPE (stmt) = void_type_node; + OACC_UPDATE_CLAUSES (stmt) = clauses; + SET_EXPR_LOCATION (stmt, loc); + add_stmt (stmt); +} + +/* OpenACC 2.0: + # pragma acc wait [(intseq)] oacc-wait-clause[optseq] new-line + + LOC is the location of the #pragma token. +*/ + +#define OACC_WAIT_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) ) + +static tree +c_parser_oacc_wait (location_t loc, c_parser *parser, char *p_name) +{ + tree clauses, list = NULL_TREE, stmt = NULL_TREE; + + if (c_parser_peek_token (parser)->type == CPP_OPEN_PAREN) + list = c_parser_oacc_wait_list (parser, loc, list); + + strcpy (p_name, " wait"); + clauses = c_parser_oacc_all_clauses (parser, OACC_WAIT_CLAUSE_MASK, p_name); + stmt = c_finish_oacc_wait (loc, list, clauses); + + return stmt; +} + /* OpenMP 2.5: # pragma omp atomic new-line expression-stmt @@ -11815,10 +12789,11 @@ c_parser_omp_flush (c_parser *parser) c_finish_omp_flush (loc); } -/* Parse the restricted form of the for statement allowed by OpenMP. +/* Parse the restricted form of loop statements allowed by OpenACC and OpenMP. The real trick here is to determine the loop control variable early so that we can push a new decl if necessary to make it private. - LOC is the location of the OMP in "#pragma omp". */ + LOC is the location of the "acc" or "omp" in "#pragma acc" or "#pragma omp", + respectively. */ static tree c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code, @@ -13660,6 +14635,29 @@ c_parser_omp_construct (c_parser *parser) switch (p_kind) { + case PRAGMA_OACC_CACHE: + strcpy (p_name, "#pragma acc"); + stmt = c_parser_oacc_cache (loc, parser); + break; + case PRAGMA_OACC_DATA: + stmt = c_parser_oacc_data (loc, parser); + break; + case PRAGMA_OACC_KERNELS: + strcpy (p_name, "#pragma acc"); + stmt = c_parser_oacc_kernels (loc, parser, p_name); + break; + case PRAGMA_OACC_LOOP: + strcpy (p_name, "#pragma acc"); + stmt = c_parser_oacc_loop (loc, parser, p_name); + break; + case PRAGMA_OACC_PARALLEL: + strcpy (p_name, "#pragma acc"); + stmt = c_parser_oacc_parallel (loc, parser, p_name); + break; + case PRAGMA_OACC_WAIT: + strcpy (p_name, "#pragma wait"); + stmt = c_parser_oacc_wait (loc, parser, p_name); + break; case PRAGMA_OMP_ATOMIC: c_parser_omp_atomic (loc, parser); return; diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index 01becd7a743..c879bc77a25 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -640,6 +640,9 @@ extern tree c_finish_bc_stmt (location_t, tree *, bool); extern tree c_finish_goto_label (location_t, tree); extern tree c_finish_goto_ptr (location_t, tree); extern tree c_expr_to_decl (tree, bool *, bool *); +extern tree c_finish_oacc_parallel (location_t, tree, tree); +extern tree c_finish_oacc_kernels (location_t, tree, tree); +extern tree c_finish_oacc_data (location_t, tree, tree); extern tree c_begin_omp_parallel (void); extern tree c_finish_omp_parallel (location_t, tree, tree); extern tree c_begin_omp_task (void); diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index a851c8d271d..f39dfdd9aae 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see #include "c-family/c-ubsan.h" #include "cilk.h" #include "wide-int.h" +#include "gomp-constants.h" /* Possible cases of implicit bad conversions. Used to select diagnostic messages in convert_for_assignment. */ @@ -11352,6 +11353,63 @@ c_expr_to_decl (tree expr, bool *tc ATTRIBUTE_UNUSED, bool *se) return expr; } +/* Generate OACC_PARALLEL, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_PARALLEL. */ + +tree +c_finish_oacc_parallel (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_PARALLEL); + TREE_TYPE (stmt) = void_type_node; + OACC_PARALLEL_CLAUSES (stmt) = clauses; + OACC_PARALLEL_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + +/* Generate OACC_KERNELS, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_KERNELS. */ + +tree +c_finish_oacc_kernels (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_KERNELS); + TREE_TYPE (stmt) = void_type_node; + OACC_KERNELS_CLAUSES (stmt) = clauses; + OACC_KERNELS_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + +/* Generate OACC_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_DATA. */ + +tree +c_finish_oacc_data (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_DATA_CLAUSES (stmt) = clauses; + OACC_DATA_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + /* Like c_begin_compound_stmt, except force the retention of the BLOCK. */ tree @@ -11883,8 +11941,9 @@ handle_omp_array_sections (tree c) OMP_CLAUSE_SIZE (c) = size; if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) return false; + gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DEVICEPTR); tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (c2) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER); if (!c_mark_addressable (t)) return false; OMP_CLAUSE_DECL (c2) = t; @@ -11946,7 +12005,7 @@ c_find_omp_placeholder_r (tree *tp, int *, void *data) return NULL_TREE; } -/* For all elements of CLAUSES, validate them vs OpenMP constraints. +/* For all elements of CLAUSES, validate them against their constraints. Remove any elements from the list that are invalid. */ tree @@ -12268,6 +12327,7 @@ c_finish_omp_clauses (tree clauses) case OMP_CLAUSE_MAP: case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: + case OMP_CLAUSE__CACHE_: t = OMP_CLAUSE_DECL (c); if (TREE_CODE (t) == TREE_LIST) { @@ -12306,7 +12366,9 @@ c_finish_omp_clauses (tree clauses) else if (!c_mark_addressable (t)) remove = true; else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER) + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER + || (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FORCE_DEVICEPTR))) && !lang_hooks.types.omp_mappable_type (TREE_TYPE (t))) { error_at (OMP_CLAUSE_LOCATION (c), @@ -12375,6 +12437,16 @@ c_finish_omp_clauses (tree clauses) case OMP_CLAUSE_TASKGROUP: case OMP_CLAUSE_PROC_BIND: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_GANG: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: pc = &OMP_CLAUSE_CHAIN (c); continue; diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 1a06cadeb53..b28966e5185 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -512,7 +512,7 @@ cgraph_node::create (tree decl) node->decl = decl; - if (flag_openmp + if ((flag_openacc || flag_openmp) && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) { node->offloadable = 1; diff --git a/gcc/config.gcc b/gcc/config.gcc index 0dfc08fc0f1..bf67bebfe86 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -2233,6 +2233,7 @@ nios2-*-*) nvptx-*) tm_file="${tm_file} newlib-stdint.h" tmake_file="nvptx/t-nvptx" + tm_file="${tm_file} nvptx/offload.h" ;; pdp11-*-*) tm_file="${tm_file} newlib-stdint.h" @@ -2965,6 +2966,7 @@ esac case ${target} in *-intelmic-* | *-intelmicemul-*) tmake_file="${tmake_file} i386/t-intelmic" + tm_file="${tm_file} i386/intelmic-offload.h" ;; esac diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h index b0dd66f39b6..3be5c93af26 100644 --- a/gcc/config/arc/arc.h +++ b/gcc/config/arc/arc.h @@ -173,7 +173,7 @@ along with GCC; see the file COPYING3. If not see %(linker) %l " LINK_PIE_SPEC "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\ %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\ %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\ - %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\ + %{fopenacc|fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\ %(mflib)\ %{fprofile-arcs|fprofile-generate|coverage:-lgcov}\ %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}\ diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 4bae095a3f2..b61dbb5eb94 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -177,7 +177,7 @@ extern GTY(()) int darwin_ms_struct; %{o*}%{!o:-o a.out} \ %{!nostdlib:%{!nostartfiles:%S}} \ %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \ - %{fopenmp|ftree-parallelize-loops=*: \ + %{fopenacc|fopenmp|ftree-parallelize-loops=*: \ %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \ %{fgnu-tm: \ %{static|static-libgcc|static-libstdc++|static-libgfortran: libitm.a%s; : -litm } } \ diff --git a/gcc/config/i386/intelmic-mkoffload.c b/gcc/config/i386/intelmic-mkoffload.c index 050f2e62d5c..edc3f92ea01 100644 --- a/gcc/config/i386/intelmic-mkoffload.c +++ b/gcc/config/i386/intelmic-mkoffload.c @@ -22,13 +22,13 @@ #include "config.h" #include +#include "libgomp-plugin.h" #include "system.h" #include "coretypes.h" #include "obstack.h" #include "intl.h" #include "diagnostic.h" #include "collect-utils.h" -#include const char tool_name[] = "intelmic mkoffload"; diff --git a/gcc/config/i386/intelmic-offload.h b/gcc/config/i386/intelmic-offload.h new file mode 100644 index 00000000000..4fb4b65aa91 --- /dev/null +++ b/gcc/config/i386/intelmic-offload.h @@ -0,0 +1,35 @@ +/* Support for Intel MIC offloading. + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef INTELMIC_OFFLOAD_H +#define INTELMIC_OFFLOAD_H + +/* Support for OpenACC acc_on_device. */ + +#include "gomp-constants.h" + +#define ACCEL_COMPILER_acc_device GOMP_DEVICE_INTEL_MIC + +#endif diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h index a7e66cddd80..d7b92e88714 100644 --- a/gcc/config/i386/mingw32.h +++ b/gcc/config/i386/mingw32.h @@ -199,7 +199,7 @@ do { \ /* mingw32 uses the -mthreads option to enable thread support. */ #undef GOMP_SELF_SPECS -#define GOMP_SELF_SPECS "%{fopenmp|ftree-parallelize-loops=*: " \ +#define GOMP_SELF_SPECS "%{fopenacc|fopenmp|ftree-parallelize-loops=*: " \ "-mthreads -pthread}" #undef GTM_SELF_SPECS #define GTM_SELF_SPECS "%{fgnu-tm:-mthreads -pthread}" diff --git a/gcc/config/ia64/hpux.h b/gcc/config/ia64/hpux.h index 0cb2fc255c2..a497e444b21 100644 --- a/gcc/config/ia64/hpux.h +++ b/gcc/config/ia64/hpux.h @@ -92,7 +92,7 @@ do { \ #undef LIB_SPEC #define LIB_SPEC \ "%{!shared: \ - %{mt|pthread:%{fopenmp|ftree-parallelize-loops=*:-lrt} -lpthread} \ + %{mt|pthread:%{fopenacc|fopenmp|ftree-parallelize-loops=*:-lrt} -lpthread} \ %{p:%{!mlp64:-L/usr/lib/hpux32/libp} \ %{mlp64:-L/usr/lib/hpux64/libp} -lprof} \ %{pg:%{!mlp64:-L/usr/lib/hpux32/libp} \ diff --git a/gcc/config/nvptx/offload.h b/gcc/config/nvptx/offload.h new file mode 100644 index 00000000000..02c5e8b961a --- /dev/null +++ b/gcc/config/nvptx/offload.h @@ -0,0 +1,35 @@ +/* Support for Nvidia PTX offloading. + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef GCC_NVPTX_OFFLOAD_H +#define GCC_NVPTX_OFFLOAD_H + +/* Support for OpenACC acc_on_device. */ + +#include "gomp-constants.h" + +#define ACCEL_COMPILER_acc_device GOMP_TARGET_NVIDIA_PTX + +#endif diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h index 0e0c33eb837..40d49a000fc 100644 --- a/gcc/config/pa/pa-hpux11.h +++ b/gcc/config/pa/pa-hpux11.h @@ -122,8 +122,8 @@ along with GCC; see the file COPYING3. If not see #undef LIB_SPEC #define LIB_SPEC \ "%{!shared:\ - %{fopenmp|ftree-parallelize-loops=*:%{static:-a archive_shared} -lrt\ - %{static:-a archive}}\ + %{fopenacc|fopenmp|ftree-parallelize-loops=*:\ + %{static:-a archive_shared} -lrt %{static:-a archive}}\ %{mt|pthread:-lpthread} -lc\ %{static:%{!nolibdld:-a archive_shared -ldld -a archive -lc}\ %{!mt:%{!pthread:-a shared -lc -a archive}}}}\ diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h index 11630b722a6..0af5c1703bc 100644 --- a/gcc/config/pa/pa64-hpux.h +++ b/gcc/config/pa/pa64-hpux.h @@ -58,22 +58,22 @@ along with GCC; see the file COPYING3. If not see #if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD) #define LIB_SPEC \ "%{!shared:\ - %{!p:%{!pg:%{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\ - %{static:-a archive}}\ + %{!p:%{!pg:%{fopenacc|fopenmp|ftree-parallelize-loops=*:\ + %{static:-a shared} -lrt %{static:-a archive}}\ %{mt|pthread:-lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ %{p:%{!pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\ -lprof %{static:-a archive}\ - %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\ - %{static:-a archive}}\ + %{fopenacc|fopenmp|ftree-parallelize-loops=*:\ + %{static:-a shared} -lrt %{static:-a archive}}\ %{mt|pthread:-lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ %{pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\ -lgprof %{static:-a archive}\ - %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\ - %{static:-a archive}}\ + %{fopenacc|fopenmp|ftree-parallelize-loops=*:\ + %{static:-a shared} -lrt %{static:-a archive}}\ %{mt|pthread:-lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ @@ -81,22 +81,22 @@ along with GCC; see the file COPYING3. If not see #else #define LIB_SPEC \ "%{!shared:\ - %{!p:%{!pg:%{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\ - %{static:-a archive}}\ + %{!p:%{!pg:%{fopenacc|fopenmp|ftree-parallelize-loops=*:\ + %{static:-a shared} -lrt %{static:-a archive}}\ %{mt|pthread:-lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ %{p:%{!pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\ -lprof %{static:-a archive}\ - %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\ - %{static:-a archive}}\ + %{fopenacc|fopenmp|ftree-parallelize-loops=*:\ + %{static:-a shared} -lrt %{static:-a archive}}\ %{mt|pthread:-lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ %{pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\ -lgprof %{static:-a archive}\ - %{fopenmp|ftree-parallelize-loops=*:%{static:-a shared} -lrt\ - %{static:-a archive}}\ + %{fopenacc|fopenmp|ftree-parallelize-loops=*:\ + %{static:-a shared} -lrt %{static:-a archive}}\ %{mt|pthread:-lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\ %{!mt:%{!pthread:-a shared -lc -a archive}}}}}\ diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 02b4fac907b..543f4d9a4d9 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,48 @@ +2015-01-15 Thomas Schwinge + James Norris + Cesar Philippidis + Ilmir Usmanov + Jakub Jelinek + + * parser.c: Include "gomp-constants.h". + (cp_parser_omp_clause_map): Use enum gomp_map_kind instead of enum + omp_clause_map_kind. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. + Use OMP_CLAUSE_SET_MAP_KIND. + (cp_parser_omp_construct, cp_parser_pragma): Handle + PRAGMA_OACC_CACHE, PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, + PRAGMA_OACC_EXIT_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_PARALLEL, + PRAGMA_OACC_LOOP, PRAGMA_OACC_UPDATE, PRAGMA_OACC_WAIT. + (cp_parser_omp_clause_name): Handle "async", "copy", "copyout", + "create", "delete", "deviceptr", "host", "num_gangs", + "num_workers", "present", "present_or_copy", "pcopy", + "present_or_copyin", "pcopyin", "present_or_copyout", "pcopyout", + "present_or_create", "pcreate", "vector_length", "wait". + (OACC_DATA_CLAUSE_MASK, OACC_ENTER_DATA_CLAUSE_MASK) + (OACC_EXIT_DATA_CLAUSE_MASK, OACC_KERNELS_CLAUSE_MASK) + (OACC_LOOP_CLAUSE_MASK, OACC_PARALLEL_CLAUSE_MASK) + (OACC_UPDATE_CLAUSE_MASK, OACC_WAIT_CLAUSE_MASK): New macros. + (cp_parser_omp_var_list_no_open): Handle OMP_CLAUSE__CACHE_. + (cp_parser_oacc_data_clause, cp_parser_oacc_data_clause_deviceptr) + (cp_parser_oacc_clause_vector_length, cp_parser_oacc_wait_list) + (cp_parser_oacc_clause_wait, cp_parser_omp_clause_num_gangs) + (cp_parser_omp_clause_num_workers, cp_parser_oacc_clause_async) + (cp_parser_oacc_all_clauses, cp_parser_oacc_cache) + (cp_parser_oacc_data, cp_parser_oacc_enter_exit_data) + (cp_parser_oacc_kernels, cp_parser_oacc_loop) + (cp_parser_oacc_parallel, cp_parser_oacc_update) + (cp_parser_oacc_wait): New functions. + * cp-tree.h (finish_oacc_data, finish_oacc_kernels) + (finish_oacc_parallel): New prototypes. + * semantics.c: Include "gomp-constants.h". + (handle_omp_array_sections): Handle GOMP_MAP_FORCE_DEVICEPTR. Use + GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use + OMP_CLAUSE_SET_MAP_KIND. + (finish_omp_clauses): Handle OMP_CLAUSE_ASYNC, + OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_WAIT, OMP_CLAUSE__CACHE_. + Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. + (finish_oacc_data, finish_oacc_kernels, finish_oacc_parallel): New + functions. + 2015-01-14 Paolo Carlini PR c++/58671 diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 77f2b5b918c..10c63fd1b4d 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -5990,6 +5990,9 @@ extern tree finish_omp_clauses (tree); extern void finish_omp_threadprivate (tree); extern tree begin_omp_structured_block (void); extern tree finish_omp_structured_block (tree); +extern tree finish_oacc_data (tree, tree); +extern tree finish_oacc_kernels (tree, tree); +extern tree finish_oacc_parallel (tree, tree); extern tree begin_omp_parallel (void); extern tree finish_omp_parallel (tree, tree); extern tree begin_omp_task (void); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 3290dfa7e59..bfa3d81bf74 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -60,6 +60,7 @@ along with GCC; see the file COPYING3. If not see #include "parser.h" #include "type-utils.h" #include "omp-low.h" +#include "gomp-constants.h" /* The lexer. */ @@ -27542,6 +27543,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_IF; else if (cp_lexer_next_token_is_keyword (parser->lexer, RID_DEFAULT)) result = PRAGMA_OMP_CLAUSE_DEFAULT; + else if (cp_lexer_next_token_is_keyword (parser->lexer, RID_DELETE)) + result = PRAGMA_OACC_CLAUSE_DELETE; else if (cp_lexer_next_token_is_keyword (parser->lexer, RID_PRIVATE)) result = PRAGMA_OMP_CLAUSE_PRIVATE; else if (cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR)) @@ -27556,20 +27559,30 @@ cp_parser_omp_clause_name (cp_parser *parser) case 'a': if (!strcmp ("aligned", p)) result = PRAGMA_OMP_CLAUSE_ALIGNED; + else if (!strcmp ("async", p)) + result = PRAGMA_OACC_CLAUSE_ASYNC; break; case 'c': if (!strcmp ("collapse", p)) result = PRAGMA_OMP_CLAUSE_COLLAPSE; + else if (!strcmp ("copy", p)) + result = PRAGMA_OACC_CLAUSE_COPY; else if (!strcmp ("copyin", p)) result = PRAGMA_OMP_CLAUSE_COPYIN; + else if (!strcmp ("copyout", p)) + result = PRAGMA_OACC_CLAUSE_COPYOUT; else if (!strcmp ("copyprivate", p)) result = PRAGMA_OMP_CLAUSE_COPYPRIVATE; + else if (!strcmp ("create", p)) + result = PRAGMA_OACC_CLAUSE_CREATE; break; case 'd': if (!strcmp ("depend", p)) result = PRAGMA_OMP_CLAUSE_DEPEND; else if (!strcmp ("device", p)) result = PRAGMA_OMP_CLAUSE_DEVICE; + else if (!strcmp ("deviceptr", p)) + result = PRAGMA_OACC_CLAUSE_DEVICEPTR; else if (!strcmp ("dist_schedule", p)) result = PRAGMA_OMP_CLAUSE_DIST_SCHEDULE; break; @@ -27581,6 +27594,10 @@ cp_parser_omp_clause_name (cp_parser *parser) else if (!strcmp ("from", p)) result = PRAGMA_OMP_CLAUSE_FROM; break; + case 'h': + if (!strcmp ("host", p)) + result = PRAGMA_OACC_CLAUSE_HOST; + break; case 'i': if (!strcmp ("inbranch", p)) result = PRAGMA_OMP_CLAUSE_INBRANCH; @@ -27606,10 +27623,14 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_NOWAIT; else if (flag_cilkplus && !strcmp ("nomask", p)) result = PRAGMA_CILK_CLAUSE_NOMASK; + else if (!strcmp ("num_gangs", p)) + result = PRAGMA_OACC_CLAUSE_NUM_GANGS; else if (!strcmp ("num_teams", p)) result = PRAGMA_OMP_CLAUSE_NUM_TEAMS; else if (!strcmp ("num_threads", p)) result = PRAGMA_OMP_CLAUSE_NUM_THREADS; + else if (!strcmp ("num_workers", p)) + result = PRAGMA_OACC_CLAUSE_NUM_WORKERS; break; case 'o': if (!strcmp ("ordered", p)) @@ -27618,6 +27639,20 @@ cp_parser_omp_clause_name (cp_parser *parser) case 'p': if (!strcmp ("parallel", p)) result = PRAGMA_OMP_CLAUSE_PARALLEL; + else if (!strcmp ("present", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT; + else if (!strcmp ("present_or_copy", p) + || !strcmp ("pcopy", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY; + else if (!strcmp ("present_or_copyin", p) + || !strcmp ("pcopyin", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN; + else if (!strcmp ("present_or_copyout", p) + || !strcmp ("pcopyout", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT; + else if (!strcmp ("present_or_create", p) + || !strcmp ("pcreate", p)) + result = PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE; else if (!strcmp ("proc_bind", p)) result = PRAGMA_OMP_CLAUSE_PROC_BIND; break; @@ -27632,6 +27667,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_SCHEDULE; else if (!strcmp ("sections", p)) result = PRAGMA_OMP_CLAUSE_SECTIONS; + else if (!strcmp ("self", p)) + result = PRAGMA_OACC_CLAUSE_SELF; else if (!strcmp ("shared", p)) result = PRAGMA_OMP_CLAUSE_SHARED; else if (!strcmp ("simdlen", p)) @@ -27652,9 +27689,15 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; break; case 'v': - if (flag_cilkplus && !strcmp ("vectorlength", p)) + if (!strcmp ("vector_length", p)) + result = PRAGMA_OACC_CLAUSE_VECTOR_LENGTH; + else if (flag_cilkplus && !strcmp ("vectorlength", p)) result = PRAGMA_CILK_CLAUSE_VECTORLENGTH; break; + case 'w': + if (!strcmp ("wait", p)) + result = PRAGMA_OACC_CLAUSE_WAIT; + break; } } @@ -27730,6 +27773,14 @@ cp_parser_omp_var_list_no_open (cp_parser *parser, enum omp_clause_code kind, { switch (kind) { + case OMP_CLAUSE__CACHE_: + if (cp_lexer_peek_token (parser->lexer)->type != CPP_OPEN_SQUARE) + { + error_at (token->location, "expected %<[%>"); + decl = error_mark_node; + break; + } + /* FALL THROUGH. */ case OMP_CLAUSE_MAP: case OMP_CLAUSE_FROM: case OMP_CLAUSE_TO: @@ -27760,6 +27811,26 @@ cp_parser_omp_var_list_no_open (cp_parser *parser, enum omp_clause_code kind, if (!cp_parser_require (parser, CPP_CLOSE_SQUARE, RT_CLOSE_SQUARE)) goto skip_comma; + + if (kind == OMP_CLAUSE__CACHE_) + { + if (TREE_CODE (low_bound) != INTEGER_CST + && !TREE_READONLY (low_bound)) + { + error_at (token->location, + "%qD is not a constant", low_bound); + decl = error_mark_node; + } + + if (TREE_CODE (length) != INTEGER_CST + && !TREE_READONLY (length)) + { + error_at (token->location, + "%qD is not a constant", length); + decl = error_mark_node; + } + } + decl = tree_cons (low_bound, length, decl); } break; @@ -27822,6 +27893,222 @@ cp_parser_omp_var_list (cp_parser *parser, enum omp_clause_code kind, tree list) return list; } +/* OpenACC 2.0: + copy ( variable-list ) + copyin ( variable-list ) + copyout ( variable-list ) + create ( variable-list ) + delete ( variable-list ) + present ( variable-list ) + present_or_copy ( variable-list ) + pcopy ( variable-list ) + present_or_copyin ( variable-list ) + pcopyin ( variable-list ) + present_or_copyout ( variable-list ) + pcopyout ( variable-list ) + present_or_create ( variable-list ) + pcreate ( variable-list ) */ + +static tree +cp_parser_oacc_data_clause (cp_parser *parser, pragma_omp_clause c_kind, + tree list) +{ + enum gomp_map_kind kind; + switch (c_kind) + { + case PRAGMA_OACC_CLAUSE_COPY: + kind = GOMP_MAP_FORCE_TOFROM; + break; + case PRAGMA_OACC_CLAUSE_COPYIN: + kind = GOMP_MAP_FORCE_TO; + break; + case PRAGMA_OACC_CLAUSE_COPYOUT: + kind = GOMP_MAP_FORCE_FROM; + break; + case PRAGMA_OACC_CLAUSE_CREATE: + kind = GOMP_MAP_FORCE_ALLOC; + break; + case PRAGMA_OACC_CLAUSE_DELETE: + kind = GOMP_MAP_FORCE_DEALLOC; + break; + case PRAGMA_OACC_CLAUSE_DEVICE: + kind = GOMP_MAP_FORCE_TO; + break; + case PRAGMA_OACC_CLAUSE_HOST: + case PRAGMA_OACC_CLAUSE_SELF: + kind = GOMP_MAP_FORCE_FROM; + break; + case PRAGMA_OACC_CLAUSE_PRESENT: + kind = GOMP_MAP_FORCE_PRESENT; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY: + kind = GOMP_MAP_TOFROM; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN: + kind = GOMP_MAP_TO; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT: + kind = GOMP_MAP_FROM; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE: + kind = GOMP_MAP_ALLOC; + break; + default: + gcc_unreachable (); + } + tree nl, c; + nl = cp_parser_omp_var_list (parser, OMP_CLAUSE_MAP, list); + + for (c = nl; c != list; c = OMP_CLAUSE_CHAIN (c)) + OMP_CLAUSE_SET_MAP_KIND (c, kind); + + return nl; +} + +/* OpenACC 2.0: + deviceptr ( variable-list ) */ + +static tree +cp_parser_oacc_data_clause_deviceptr (cp_parser *parser, tree list) +{ + location_t loc = cp_lexer_peek_token (parser->lexer)->location; + tree vars, t; + + /* Can't use OMP_CLAUSE_MAP here (that is, can't use the generic + cp_parser_oacc_data_clause), as for PRAGMA_OACC_CLAUSE_DEVICEPTR, + variable-list must only allow for pointer variables. */ + vars = cp_parser_omp_var_list (parser, OMP_CLAUSE_ERROR, NULL); + for (t = vars; t; t = TREE_CHAIN (t)) + { + tree v = TREE_PURPOSE (t); + + /* FIXME diagnostics: Ideally we should keep individual + locations for all the variables in the var list to make the + following errors more precise. Perhaps + c_parser_omp_var_list_parens should construct a list of + locations to go along with the var list. */ + + if (TREE_CODE (v) != VAR_DECL) + error_at (loc, "%qD is not a variable", v); + else if (TREE_TYPE (v) == error_mark_node) + ; + else if (!POINTER_TYPE_P (TREE_TYPE (v))) + error_at (loc, "%qD is not a pointer variable", v); + + tree u = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (u, GOMP_MAP_FORCE_DEVICEPTR); + OMP_CLAUSE_DECL (u) = v; + OMP_CLAUSE_CHAIN (u) = list; + list = u; + } + + return list; +} + +/* OpenACC: + vector_length ( expression ) */ + +static tree +cp_parser_oacc_clause_vector_length (cp_parser *parser, tree list) +{ + tree t, c; + location_t location = cp_lexer_peek_token (parser->lexer)->location; + bool error = false; + + if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN)) + return list; + + t = cp_parser_condition (parser); + if (t == error_mark_node || !INTEGRAL_TYPE_P (TREE_TYPE (t))) + { + error_at (location, "expected positive integer expression"); + error = true; + } + + if (error || !cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN)) + { + cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true, + /*or_comma=*/false, + /*consume_paren=*/true); + return list; + } + + check_no_duplicate_clause (list, OMP_CLAUSE_VECTOR_LENGTH, "vector_length", + location); + + c = build_omp_clause (location, OMP_CLAUSE_VECTOR_LENGTH); + OMP_CLAUSE_VECTOR_LENGTH_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + + return list; +} + +/* OpenACC 2.0 + Parse wait clause or directive parameters. */ + +static tree +cp_parser_oacc_wait_list (cp_parser *parser, location_t clause_loc, tree list) +{ + vec *args; + tree t, args_tree; + + args = cp_parser_parenthesized_expression_list (parser, non_attr, + /*cast_p=*/false, + /*allow_expansion_p=*/true, + /*non_constant_p=*/NULL); + + if (args == NULL || args->length () == 0) + { + cp_parser_error (parser, "expected integer expression before ')'"); + if (args != NULL) + release_tree_vector (args); + return list; + } + + args_tree = build_tree_list_vec (args); + + release_tree_vector (args); + + for (t = args_tree; t; t = TREE_CHAIN (t)) + { + tree targ = TREE_VALUE (t); + + if (targ != error_mark_node) + { + if (!INTEGRAL_TYPE_P (TREE_TYPE (targ))) + error ("% expression must be integral"); + else + { + tree c = build_omp_clause (clause_loc, OMP_CLAUSE_WAIT); + + mark_rvalue_use (targ); + OMP_CLAUSE_DECL (c) = targ; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + } + } + } + + return list; +} + +/* OpenACC: + wait ( int-expr-list ) */ + +static tree +cp_parser_oacc_clause_wait (cp_parser *parser, tree list) +{ + location_t location = cp_lexer_peek_token (parser->lexer)->location; + + if (cp_lexer_peek_token (parser->lexer)->type != CPP_OPEN_PAREN) + return list; + + list = cp_parser_oacc_wait_list (parser, location, list); + + return list; +} + /* OpenMP 3.0: collapse ( constant-expression ) */ @@ -28010,6 +28297,42 @@ cp_parser_omp_clause_nowait (cp_parser * /*parser*/, return c; } +/* OpenACC: + num_gangs ( expression ) */ + +static tree +cp_parser_omp_clause_num_gangs (cp_parser *parser, tree list) +{ + tree t, c; + location_t location = cp_lexer_peek_token (parser->lexer)->location; + + if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN)) + return list; + + t = cp_parser_condition (parser); + + if (t == error_mark_node + || !cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN)) + cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true, + /*or_comma=*/false, + /*consume_paren=*/true); + + if (!INTEGRAL_TYPE_P (TREE_TYPE (t))) + { + error_at (location, "expected positive integer expression"); + return list; + } + + check_no_duplicate_clause (list, OMP_CLAUSE_NUM_GANGS, "num_gangs", location); + + c = build_omp_clause (location, OMP_CLAUSE_NUM_GANGS); + OMP_CLAUSE_NUM_GANGS_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + + return list; +} + /* OpenMP 2.5: num_threads ( expression ) */ @@ -28040,6 +28363,43 @@ cp_parser_omp_clause_num_threads (cp_parser *parser, tree list, return c; } +/* OpenACC: + num_workers ( expression ) */ + +static tree +cp_parser_omp_clause_num_workers (cp_parser *parser, tree list) +{ + tree t, c; + location_t location = cp_lexer_peek_token (parser->lexer)->location; + + if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN)) + return list; + + t = cp_parser_condition (parser); + + if (t == error_mark_node + || !cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN)) + cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true, + /*or_comma=*/false, + /*consume_paren=*/true); + + if (!INTEGRAL_TYPE_P (TREE_TYPE (t))) + { + error_at (location, "expected positive integer expression"); + return list; + } + + check_no_duplicate_clause (list, OMP_CLAUSE_NUM_WORKERS, "num_gangs", + location); + + c = build_omp_clause (location, OMP_CLAUSE_NUM_WORKERS); + OMP_CLAUSE_NUM_WORKERS_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + + return list; +} + /* OpenMP 2.5: ordered */ @@ -28562,7 +28922,7 @@ static tree cp_parser_omp_clause_map (cp_parser *parser, tree list) { tree nlist, c; - enum omp_clause_map_kind kind = OMP_CLAUSE_MAP_TOFROM; + enum gomp_map_kind kind = GOMP_MAP_TOFROM; if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN)) return list; @@ -28574,13 +28934,13 @@ cp_parser_omp_clause_map (cp_parser *parser, tree list) const char *p = IDENTIFIER_POINTER (id); if (strcmp ("alloc", p) == 0) - kind = OMP_CLAUSE_MAP_ALLOC; + kind = GOMP_MAP_ALLOC; else if (strcmp ("to", p) == 0) - kind = OMP_CLAUSE_MAP_TO; + kind = GOMP_MAP_TO; else if (strcmp ("from", p) == 0) - kind = OMP_CLAUSE_MAP_FROM; + kind = GOMP_MAP_FROM; else if (strcmp ("tofrom", p) == 0) - kind = OMP_CLAUSE_MAP_TOFROM; + kind = GOMP_MAP_TOFROM; else { cp_parser_error (parser, "invalid map kind"); @@ -28597,7 +28957,7 @@ cp_parser_omp_clause_map (cp_parser *parser, tree list) NULL); for (c = nlist; c != list; c = OMP_CLAUSE_CHAIN (c)) - OMP_CLAUSE_MAP_KIND (c) = kind; + OMP_CLAUSE_SET_MAP_KIND (c, kind); return nlist; } @@ -28734,6 +29094,178 @@ cp_parser_omp_clause_proc_bind (cp_parser *parser, tree list, return list; } +/* OpenACC: + async [( int-expr )] */ + +static tree +cp_parser_oacc_clause_async (cp_parser *parser, tree list) +{ + tree c, t; + location_t loc = cp_lexer_peek_token (parser->lexer)->location; + + t = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL); + + if (cp_lexer_peek_token (parser->lexer)->type == CPP_OPEN_PAREN) + { + cp_lexer_consume_token (parser->lexer); + + t = cp_parser_expression (parser); + if (t == error_mark_node + || !cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN)) + cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true, + /*or_comma=*/false, + /*consume_paren=*/true); + } + + check_no_duplicate_clause (list, OMP_CLAUSE_ASYNC, "async", loc); + + c = build_omp_clause (loc, OMP_CLAUSE_ASYNC); + OMP_CLAUSE_ASYNC_EXPR (c) = t; + OMP_CLAUSE_CHAIN (c) = list; + list = c; + + return list; +} + +/* Parse all OpenACC clauses. The set clauses allowed by the directive + is a bitmask in MASK. Return the list of clauses found. */ + +static tree +cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, + const char *where, cp_token *pragma_tok, + bool finish_p = true) +{ + tree clauses = NULL; + bool first = true; + + while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL)) + { + location_t here; + pragma_omp_clause c_kind; + const char *c_name; + tree prev = clauses; + + if (!first && cp_lexer_next_token_is (parser->lexer, CPP_COMMA)) + cp_lexer_consume_token (parser->lexer); + + here = cp_lexer_peek_token (parser->lexer)->location; + c_kind = cp_parser_omp_clause_name (parser); + + switch (c_kind) + { + case PRAGMA_OACC_CLAUSE_ASYNC: + clauses = cp_parser_oacc_clause_async (parser, clauses); + c_name = "async"; + break; + case PRAGMA_OACC_CLAUSE_COLLAPSE: + clauses = cp_parser_omp_clause_collapse (parser, clauses, here); + c_name = "collapse"; + break; + case PRAGMA_OACC_CLAUSE_COPY: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "copy"; + break; + case PRAGMA_OACC_CLAUSE_COPYIN: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "copyin"; + break; + case PRAGMA_OACC_CLAUSE_COPYOUT: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "copyout"; + break; + case PRAGMA_OACC_CLAUSE_CREATE: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "create"; + break; + case PRAGMA_OACC_CLAUSE_DELETE: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "delete"; + break; + case PRAGMA_OACC_CLAUSE_DEVICE: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "device"; + break; + case PRAGMA_OACC_CLAUSE_DEVICEPTR: + clauses = cp_parser_oacc_data_clause_deviceptr (parser, clauses); + c_name = "deviceptr"; + break; + case PRAGMA_OACC_CLAUSE_HOST: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "host"; + break; + case PRAGMA_OACC_CLAUSE_IF: + clauses = cp_parser_omp_clause_if (parser, clauses, here); + c_name = "if"; + break; + case PRAGMA_OACC_CLAUSE_NUM_GANGS: + clauses = cp_parser_omp_clause_num_gangs (parser, clauses); + c_name = "num_gangs"; + break; + case PRAGMA_OACC_CLAUSE_NUM_WORKERS: + clauses = cp_parser_omp_clause_num_workers (parser, clauses); + c_name = "num_workers"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_copy"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_copyin"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_copyout"; + break; + case PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "present_or_create"; + break; + case PRAGMA_OACC_CLAUSE_REDUCTION: + clauses = cp_parser_omp_clause_reduction (parser, clauses); + c_name = "reduction"; + break; + case PRAGMA_OACC_CLAUSE_SELF: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "self"; + break; + case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: + clauses = cp_parser_oacc_clause_vector_length (parser, clauses); + c_name = "vector_length"; + break; + case PRAGMA_OACC_CLAUSE_WAIT: + clauses = cp_parser_oacc_clause_wait (parser, clauses); + c_name = "wait"; + break; + default: + cp_parser_error (parser, "expected %<#pragma acc%> clause"); + goto saw_error; + } + + first = false; + + if (((mask >> c_kind) & 1) == 0) + { + /* Remove the invalid clause(s) from the list to avoid + confusing the rest of the compiler. */ + clauses = prev; + error_at (here, "%qs is not valid for %qs", c_name, where); + } + } + + saw_error: + cp_parser_skip_to_pragma_eol (parser, pragma_tok); + + if (finish_p) + return finish_omp_clauses (clauses); + + return clauses; +} + /* Parse all OpenMP clauses. The set clauses allowed by the directive is a bitmask in MASK. Return the list of clauses found; the result of clause default goes in *pdefault. */ @@ -30953,6 +31485,304 @@ cp_parser_omp_target (cp_parser *parser, cp_token *pragma_tok, return true; } +/* OpenACC 2.0: + # pragma acc cache (variable-list) new-line +*/ + +static tree +cp_parser_oacc_cache (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses; + + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE__CACHE_, NULL_TREE); + clauses = finish_omp_clauses (clauses); + + cp_parser_require_pragma_eol (parser, cp_lexer_peek_token (parser->lexer)); + + stmt = make_node (OACC_CACHE); + TREE_TYPE (stmt) = void_type_node; + OACC_CACHE_CLAUSES (stmt) = clauses; + SET_EXPR_LOCATION (stmt, pragma_tok->location); + add_stmt (stmt); + + return stmt; +} + +/* OpenACC 2.0: + # pragma acc data oacc-data-clause[optseq] new-line + structured-block */ + +#define OACC_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE)) + +static tree +cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_DATA_CLAUSE_MASK, + "#pragma acc data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_data (clauses, block); + return stmt; +} + +/* OpenACC 2.0: + # pragma acc enter data oacc-enter-data-clause[optseq] new-line + + or + + # pragma acc exit data oacc-exit-data-clause[optseq] new-line + + LOC is the location of the #pragma token. +*/ + +#define OACC_ENTER_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) + +#define OACC_EXIT_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DELETE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) + +static tree +cp_parser_oacc_enter_exit_data (cp_parser *parser, cp_token *pragma_tok, + bool enter) +{ + tree stmt, clauses; + + if (cp_lexer_next_token_is (parser->lexer, CPP_PRAGMA_EOL) + || cp_lexer_next_token_is_not (parser->lexer, CPP_NAME)) + { + cp_parser_error (parser, enter + ? "expected % in %<#pragma acc enter data%>" + : "expected % in %<#pragma acc exit data%>"); + cp_parser_skip_to_pragma_eol (parser, pragma_tok); + return NULL_TREE; + } + + const char *p = + IDENTIFIER_POINTER (cp_lexer_peek_token (parser->lexer)->u.value); + if (strcmp (p, "data") != 0) + { + cp_parser_error (parser, "invalid pragma"); + cp_parser_skip_to_pragma_eol (parser, pragma_tok); + return NULL_TREE; + } + + cp_lexer_consume_token (parser->lexer); + + if (enter) + clauses = cp_parser_oacc_all_clauses (parser, OACC_ENTER_DATA_CLAUSE_MASK, + "#pragma acc enter data", pragma_tok); + else + clauses = cp_parser_oacc_all_clauses (parser, OACC_EXIT_DATA_CLAUSE_MASK, + "#pragma acc exit data", pragma_tok); + + if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + { + error_at (pragma_tok->location, + "%<#pragma acc enter data%> has no data movement clause"); + return NULL_TREE; + } + + stmt = enter ? make_node (OACC_ENTER_DATA) : make_node (OACC_EXIT_DATA); + TREE_TYPE (stmt) = void_type_node; + if (enter) + OACC_ENTER_DATA_CLAUSES (stmt) = clauses; + else + OACC_EXIT_DATA_CLAUSES (stmt) = clauses; + SET_EXPR_LOCATION (stmt, pragma_tok->location); + add_stmt (stmt); + return stmt; +} + +/* OpenACC 2.0: + # pragma acc kernels oacc-kernels-clause[optseq] new-line + structured-block */ + +#define OACC_KERNELS_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT)) + +static tree +cp_parser_oacc_kernels (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_KERNELS_CLAUSE_MASK, + "#pragma acc kernels", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_kernels (clauses, block); + return stmt; +} + +/* OpenACC 2.0: + # pragma acc loop oacc-loop-clause[optseq] new-line + structured-block */ + +#define OACC_LOOP_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COLLAPSE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_REDUCTION)) + +static tree +cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_LOOP_CLAUSE_MASK, + "#pragma acc loop", pragma_tok); + + block = begin_omp_structured_block (); + save = cp_parser_begin_omp_structured_block (parser); + stmt = cp_parser_omp_for_loop (parser, OACC_LOOP, clauses, NULL); + cp_parser_end_omp_structured_block (parser, save); + add_stmt (finish_omp_structured_block (block)); + return stmt; +} + +/* OpenACC 2.0: + # pragma acc parallel oacc-parallel-clause[optseq] new-line + structured-block */ + +#define OACC_PARALLEL_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_REDUCTION) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT)) + +static tree +cp_parser_oacc_parallel (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_PARALLEL_CLAUSE_MASK, + "#pragma acc parallel", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_parallel (clauses, block); + return stmt; +} + +/* OpenACC 2.0: + # pragma acc update oacc-update-clause[optseq] new-line +*/ + +#define OACC_UPDATE_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_HOST) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_SELF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT)) + +static tree +cp_parser_oacc_update (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_UPDATE_CLAUSE_MASK, + "#pragma acc update", pragma_tok); + + if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + { + error_at (pragma_tok->location, + "%<#pragma acc update%> must contain at least one " + "% or % clause"); + return NULL_TREE; + } + + stmt = make_node (OACC_UPDATE); + TREE_TYPE (stmt) = void_type_node; + OACC_UPDATE_CLAUSES (stmt) = clauses; + SET_EXPR_LOCATION (stmt, pragma_tok->location); + add_stmt (stmt); + return stmt; +} + +/* OpenACC 2.0: + # pragma acc wait [(intseq)] oacc-wait-clause[optseq] new-line + + LOC is the location of the #pragma token. +*/ + +#define OACC_WAIT_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC)) + +static tree +cp_parser_oacc_wait (cp_parser *parser, cp_token *pragma_tok) +{ + tree clauses, list = NULL_TREE, stmt = NULL_TREE; + location_t loc = cp_lexer_peek_token (parser->lexer)->location; + + if (cp_lexer_peek_token (parser->lexer)->type == CPP_OPEN_PAREN) + list = cp_parser_oacc_wait_list (parser, loc, list); + + clauses = cp_parser_oacc_all_clauses (parser, OACC_WAIT_CLAUSE_MASK, + "#pragma acc wait", pragma_tok); + + stmt = c_finish_oacc_wait (loc, list, clauses); + + return stmt; +} + /* OpenMP 4.0: # pragma omp declare simd declare-simd-clauses[optseq] new-line */ @@ -31627,6 +32457,33 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok) switch (pragma_tok->pragma_kind) { + case PRAGMA_OACC_CACHE: + stmt = cp_parser_oacc_cache (parser, pragma_tok); + break; + case PRAGMA_OACC_DATA: + stmt = cp_parser_oacc_data (parser, pragma_tok); + break; + case PRAGMA_OACC_ENTER_DATA: + stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, true); + break; + case PRAGMA_OACC_EXIT_DATA: + stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false); + break; + case PRAGMA_OACC_KERNELS: + stmt = cp_parser_oacc_kernels (parser, pragma_tok); + break; + case PRAGMA_OACC_LOOP: + stmt = cp_parser_oacc_loop (parser, pragma_tok); + break; + case PRAGMA_OACC_PARALLEL: + stmt = cp_parser_oacc_parallel (parser, pragma_tok); + break; + case PRAGMA_OACC_UPDATE: + stmt = cp_parser_oacc_update (parser, pragma_tok); + break; + case PRAGMA_OACC_WAIT: + stmt = cp_parser_oacc_wait (parser, pragma_tok); + break; case PRAGMA_OMP_ATOMIC: cp_parser_omp_atomic (parser, pragma_tok); return; @@ -32169,6 +33026,15 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context) cp_parser_omp_declare (parser, pragma_tok, context); return false; + case PRAGMA_OACC_CACHE: + case PRAGMA_OACC_DATA: + case PRAGMA_OACC_ENTER_DATA: + case PRAGMA_OACC_EXIT_DATA: + case PRAGMA_OACC_KERNELS: + case PRAGMA_OACC_PARALLEL: + case PRAGMA_OACC_LOOP: + case PRAGMA_OACC_UPDATE: + case PRAGMA_OACC_WAIT: case PRAGMA_OMP_ATOMIC: case PRAGMA_OMP_CRITICAL: case PRAGMA_OMP_DISTRIBUTE: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 15b8d0111a0..915048daf0a 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -66,6 +66,7 @@ along with GCC; see the file COPYING3. If not see #include "omp-low.h" #include "builtins.h" #include "convert.h" +#include "gomp-constants.h" /* There routines provide a modular interface to perform many parsing operations. They may therefore be used during actual parsing, or @@ -4670,7 +4671,7 @@ handle_omp_array_sections (tree c) return false; tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (c2) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER); if (!cxx_mark_addressable (t)) return false; OMP_CLAUSE_DECL (c2) = t; @@ -4694,7 +4695,7 @@ handle_omp_array_sections (tree c) { tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (c3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (c3) = ptr; OMP_CLAUSE_DECL (c2) = convert_from_reference (ptr); OMP_CLAUSE_SIZE (c3) = size_zero_node; @@ -5571,6 +5572,44 @@ finish_omp_clauses (tree clauses) } break; + case OMP_CLAUSE_ASYNC: + t = OMP_CLAUSE_ASYNC_EXPR (c); + if (t == error_mark_node) + remove = true; + else if (!type_dependent_expression_p (t) + && !INTEGRAL_TYPE_P (TREE_TYPE (t))) + { + error ("% expression must be integral"); + remove = true; + } + else + { + t = mark_rvalue_use (t); + if (!processing_template_decl) + t = fold_build_cleanup_point_expr (TREE_TYPE (t), t); + OMP_CLAUSE_ASYNC_EXPR (c) = t; + } + break; + + case OMP_CLAUSE_VECTOR_LENGTH: + t = OMP_CLAUSE_VECTOR_LENGTH_EXPR (c); + t = maybe_convert_cond (t); + if (t == error_mark_node) + remove = true; + else if (!processing_template_decl) + t = fold_build_cleanup_point_expr (TREE_TYPE (t), t); + OMP_CLAUSE_VECTOR_LENGTH_EXPR (c) = t; + break; + + case OMP_CLAUSE_WAIT: + t = OMP_CLAUSE_WAIT_EXPR (c); + if (t == error_mark_node) + remove = true; + else if (!processing_template_decl) + t = fold_build_cleanup_point_expr (TREE_TYPE (t), t); + OMP_CLAUSE_WAIT_EXPR (c) = t; + break; + case OMP_CLAUSE_THREAD_LIMIT: t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); if (t == error_mark_node) @@ -5721,6 +5760,7 @@ finish_omp_clauses (tree clauses) case OMP_CLAUSE_MAP: case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: + case OMP_CLAUSE__CACHE_: t = OMP_CLAUSE_DECL (c); if (TREE_CODE (t) == TREE_LIST) { @@ -5749,7 +5789,7 @@ finish_omp_clauses (tree clauses) if (processing_template_decl) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER) + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) break; if (DECL_P (t)) error ("%qD is not a variable in %qs clause", t, @@ -5770,7 +5810,7 @@ finish_omp_clauses (tree clauses) && !cxx_mark_addressable (t)) remove = true; else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER) + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) && !type_dependent_expression_p (t) && !cp_omp_mappable_type ((TREE_CODE (TREE_TYPE (t)) == REFERENCE_TYPE) @@ -6088,6 +6128,60 @@ finish_omp_structured_block (tree block) return do_poplevel (block); } +/* Generate OACC_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_DATA. */ + +tree +finish_oacc_data (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_DATA_CLAUSES (stmt) = clauses; + OACC_DATA_BODY (stmt) = block; + + return add_stmt (stmt); +} + +/* Generate OACC_KERNELS, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_KERNELS. */ + +tree +finish_oacc_kernels (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_KERNELS); + TREE_TYPE (stmt) = void_type_node; + OACC_KERNELS_CLAUSES (stmt) = clauses; + OACC_KERNELS_BODY (stmt) = block; + + return add_stmt (stmt); +} + +/* Generate OACC_PARALLEL, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_PARALLEL. */ + +tree +finish_oacc_parallel (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_PARALLEL); + TREE_TYPE (stmt) = void_type_node; + OACC_PARALLEL_CLAUSES (stmt) = clauses; + OACC_PARALLEL_BODY (stmt) = block; + + return add_stmt (stmt); +} + /* Similarly, except force the retention of the BLOCK. */ tree diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index 0e1743733b2..bbafad9f931 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1819,6 +1819,7 @@ There are also several varieties of complex statements. * Jumps:: * Cleanups:: * OpenMP:: +* OpenACC:: @end menu @node Basic Statements @@ -2093,8 +2094,8 @@ variables. @item OMP_FOR -Represents @code{#pragma omp for [clause1 @dots{} clauseN]}. It -has 5 operands: +Represents @code{#pragma omp for [clause1 @dots{} clauseN]}. It has +six operands: Operand @code{OMP_FOR_BODY} contains the loop body. @@ -2184,10 +2185,9 @@ building code (@code{omp-low.c}). @item OMP_CONTINUE Similarly, this instruction does not represent an OpenMP -directive, it is used by @code{OMP_FOR} and +directive, it is used by @code{OMP_FOR} (and similar codes) as well as @code{OMP_SECTIONS} to mark the place where the code needs to -loop to the next iteration (in the case of @code{OMP_FOR}) or -the next section (in the case of @code{OMP_SECTIONS}). +loop to the next iteration, or the next section, respectively. In some cases, @code{OMP_CONTINUE} is placed right before @code{OMP_RETURN}. But if there are cleanups that need to @@ -2233,6 +2233,67 @@ compilation. @end table +@node OpenACC +@subsection OpenACC +@tindex OACC_CACHE +@tindex OACC_DATA +@tindex OACC_DECLARE +@tindex OACC_ENTER_DATA +@tindex OACC_EXIT_DATA +@tindex OACC_HOST_DATA +@tindex OACC_KERNELS +@tindex OACC_LOOP +@tindex OACC_PARALLEL +@tindex OACC_UPDATE + +All the statements starting with @code{OACC_} represent directives and +clauses used by the OpenACC API @w{@uref{http://www.openacc.org/}}. + +@table @code +@item OACC_CACHE + +Represents @code{#pragma acc cache (var @dots{})}. + +@item OACC_DATA + +Represents @code{#pragma acc data [clause1 @dots{} clauseN]}. + +@item OACC_DECLARE + +Represents @code{#pragma acc declare [clause1 @dots{} clauseN]}. + +@item OACC_ENTER_DATA + +Represents @code{#pragma acc enter data [clause1 @dots{} clauseN]}. + +@item OACC_EXIT_DATA + +Represents @code{#pragma acc exit data [clause1 @dots{} clauseN]}. + +@item OACC_HOST_DATA + +Represents @code{#pragma acc host_data [clause1 @dots{} clauseN]}. + +@item OACC_KERNELS + +Represents @code{#pragma acc kernels [clause1 @dots{} clauseN]}. + +@item OACC_LOOP + +Represents @code{#pragma acc loop [clause1 @dots{} clauseN]}. + +See the description of the @code{OMP_FOR} code. + +@item OACC_PARALLEL + +Represents @code{#pragma acc parallel [clause1 @dots{} clauseN]}. + +@item OACC_UPDATE + +Represents @code{#pragma acc update [clause1 @dots{} clauseN]}. + +@end table + @c --------------------------------------------------------------------- @c Functions @c --------------------------------------------------------------------- diff --git a/gcc/doc/gimple.texi b/gcc/doc/gimple.texi index 54daf584ae6..543de90c35c 100644 --- a/gcc/doc/gimple.texi +++ b/gcc/doc/gimple.texi @@ -1828,9 +1828,8 @@ Set @code{NAME} to be the name associated with @code{OMP} critical statement @co tree clauses, tree index, tree initial, tree final, tree incr, @ gimple_seq pre_body, enum tree_code omp_for_cond) Build a @code{GIMPLE_OMP_FOR} statement. @code{BODY} is sequence of statements -inside the for loop. @code{CLAUSES}, are any of the @code{OMP} loop -construct's clauses: private, firstprivate, lastprivate, -reductions, ordered, schedule, and nowait. @code{PRE_BODY} is the +inside the for loop. @code{CLAUSES}, are any of the loop +construct's clauses. @code{PRE_BODY} is the sequence of statements that are loop invariant. @code{INDEX} is the index variable. @code{INITIAL} is the initial value of @code{INDEX}. @code{FINAL} is final value of @code{INDEX}. OMP_FOR_COND is the predicate used to diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 96faf0fd5b0..510201acb24 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -168,8 +168,8 @@ in the following sections. @gccoptlist{-ansi -std=@var{standard} -fgnu89-inline @gol -aux-info @var{filename} -fallow-parameterless-variadic-functions @gol -fno-asm -fno-builtin -fno-builtin-@var{function} @gol --fhosted -ffreestanding -fopenmp -fopenmp-simd -fms-extensions @gol --fplan9-extensions -trigraphs -traditional -traditional-cpp @gol +-fhosted -ffreestanding -fopenacc -fopenmp -fopenmp-simd @gol +-fms-extensions -fplan9-extensions -trigraphs -traditional -traditional-cpp @gol -fallow-single-precision -fcond-mismatch -flax-vector-conversions @gol -fsigned-bitfields -fsigned-char @gol -funsigned-bitfields -funsigned-char} @@ -1885,6 +1885,20 @@ This is equivalent to @option{-fno-hosted}. @xref{Standards,,Language Standards Supported by GCC}, for details of freestanding and hosted environments. +@item -fopenacc +@opindex fopenacc +@cindex OpenACC accelerator programming +Enable handling of OpenACC directives @code{#pragma acc} in C/C++ and +@code{!$acc} in Fortran. When @option{-fopenacc} is specified, the +compiler generates accelerated code according to the OpenACC Application +Programming Interface v2.0 @w{@uref{http://www.openacc.org/}}. This option +implies @option{-pthread}, and thus is only supported on targets that +have support for @option{-pthread}. + +Note that this is an experimental feature, incomplete, and subject to +change in future versions of GCC. See +@w{@uref{https://gcc.gnu.org/wiki/OpenACC}} for more information. + @item -fopenmp @opindex fopenmp @cindex OpenMP parallel diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index b8b6a06124a..738e55832ae 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1836,6 +1836,9 @@ Target supports Graphite optimizations. @item fixed_point Target supports fixed-point extension to C. +@item fopenacc +Target supports OpenACC via @option{-fopenacc}. + @item fopenmp Target supports OpenMP via @option{-fopenmp}. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index df4a2f36840..d8b72a2d542 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,199 @@ +2015-01-15 Thomas Schwinge + Cesar Philippidis + James Norris + Ilmir Usmanov + Tobias Burnus + + * lang.opt (fopenacc): New option. + * cpp.c (cpp_define_builtins): Conditionally define _OPENACC. + * dump-parse-tree.c (show_omp_node): Split part of it into... + (show_omp_clauses): ... this new function. + (show_omp_node, show_code_node): Handle EXEC_OACC_PARALLEL_LOOP, + EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS_LOOP, EXEC_OACC_KERNELS, + EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, + EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, + EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. + (show_namespace): Update for OpenACC. + * f95-lang.c (DEF_FUNCTION_TYPE_VAR_2, DEF_FUNCTION_TYPE_VAR_8) + (DEF_FUNCTION_TYPE_VAR_12, DEF_GOACC_BUILTIN) + (DEF_GOACC_BUILTIN_COMPILER): New macros. + * types.def (BT_FN_VOID_INT_INT_VAR) + (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR) + (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR): + New function types. + * gfortran.h (gfc_statement): Add ST_OACC_PARALLEL_LOOP, + ST_OACC_END_PARALLEL_LOOP, ST_OACC_PARALLEL, ST_OACC_END_PARALLEL, + ST_OACC_KERNELS, ST_OACC_END_KERNELS, ST_OACC_DATA, + ST_OACC_END_DATA, ST_OACC_HOST_DATA, ST_OACC_END_HOST_DATA, + ST_OACC_LOOP, ST_OACC_END_LOOP, ST_OACC_DECLARE, ST_OACC_UPDATE, + ST_OACC_WAIT, ST_OACC_CACHE, ST_OACC_KERNELS_LOOP, + ST_OACC_END_KERNELS_LOOP, ST_OACC_ENTER_DATA, ST_OACC_EXIT_DATA, + ST_OACC_ROUTINE. + (struct gfc_expr_list): New data type. + (gfc_get_expr_list): New macro. + (gfc_omp_map_op): Add OMP_MAP_FORCE_ALLOC, OMP_MAP_FORCE_DEALLOC, + OMP_MAP_FORCE_TO, OMP_MAP_FORCE_FROM, OMP_MAP_FORCE_TOFROM, + OMP_MAP_FORCE_PRESENT, OMP_MAP_FORCE_DEVICEPTR. + (OMP_LIST_FIRST, OMP_LIST_DEVICE_RESIDENT, OMP_LIST_USE_DEVICE) + (OMP_LIST_CACHE): New enumerators. + (struct gfc_omp_clauses): Add async_expr, gang_expr, worker_expr, + vector_expr, num_gangs_expr, num_workers_expr, vector_length_expr, + wait_list, tile_list, async, gang, worker, vector, seq, + independent, wait, par_auto, gang_static, and loc members. + (struct gfc_namespace): Add oacc_declare_clauses member. + (gfc_exec_op): Add EXEC_OACC_KERNELS_LOOP, + EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS, + EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, + EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, + EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. + (gfc_free_expr_list, gfc_resolve_oacc_directive) + (gfc_resolve_oacc_declare, gfc_resolve_oacc_parallel_loop_blocks) + (gfc_resolve_oacc_blocks): New prototypes. + * match.c (match_exit_cycle): Handle EXEC_OACC_LOOP and + EXEC_OACC_PARALLEL_LOOP. + * match.h (gfc_match_oacc_cache, gfc_match_oacc_wait) + (gfc_match_oacc_update, gfc_match_oacc_declare) + (gfc_match_oacc_loop, gfc_match_oacc_host_data) + (gfc_match_oacc_data, gfc_match_oacc_kernels) + (gfc_match_oacc_kernels_loop, gfc_match_oacc_parallel) + (gfc_match_oacc_parallel_loop, gfc_match_oacc_enter_data) + (gfc_match_oacc_exit_data, gfc_match_oacc_routine): New + prototypes. + * openmp.c: Include "diagnostic.h" and "gomp-constants.h". + (gfc_free_omp_clauses): Update for members added to struct + gfc_omp_clauses. + (gfc_match_omp_clauses): Change mask paramter to uint64_t. Add + openacc parameter. + (resolve_omp_clauses): Add openacc parameter. Update for OpenACC. + (struct fortran_omp_context): Add is_openmp member. + (gfc_resolve_omp_parallel_blocks): Initialize it. + (gfc_resolve_do_iterator): Update for OpenACC. + (gfc_resolve_omp_directive): Call + resolve_omp_directive_inside_oacc_region. + (OMP_CLAUSE_PRIVATE, OMP_CLAUSE_FIRSTPRIVATE) + (OMP_CLAUSE_LASTPRIVATE, OMP_CLAUSE_COPYPRIVATE) + (OMP_CLAUSE_SHARED, OMP_CLAUSE_COPYIN, OMP_CLAUSE_REDUCTION) + (OMP_CLAUSE_IF, OMP_CLAUSE_NUM_THREADS, OMP_CLAUSE_SCHEDULE) + (OMP_CLAUSE_DEFAULT, OMP_CLAUSE_ORDERED, OMP_CLAUSE_COLLAPSE) + (OMP_CLAUSE_UNTIED, OMP_CLAUSE_FINAL, OMP_CLAUSE_MERGEABLE) + (OMP_CLAUSE_ALIGNED, OMP_CLAUSE_DEPEND, OMP_CLAUSE_INBRANCH) + (OMP_CLAUSE_LINEAR, OMP_CLAUSE_NOTINBRANCH, OMP_CLAUSE_PROC_BIND) + (OMP_CLAUSE_SAFELEN, OMP_CLAUSE_SIMDLEN, OMP_CLAUSE_UNIFORM) + (OMP_CLAUSE_DEVICE, OMP_CLAUSE_MAP, OMP_CLAUSE_TO) + (OMP_CLAUSE_FROM, OMP_CLAUSE_NUM_TEAMS, OMP_CLAUSE_THREAD_LIMIT) + (OMP_CLAUSE_DIST_SCHEDULE): Use uint64_t. + (OMP_CLAUSE_ASYNC, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS) + (OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_COPY, OMP_CLAUSE_COPYOUT) + (OMP_CLAUSE_CREATE, OMP_CLAUSE_PRESENT) + (OMP_CLAUSE_PRESENT_OR_COPY, OMP_CLAUSE_PRESENT_OR_COPYIN) + (OMP_CLAUSE_PRESENT_OR_COPYOUT, OMP_CLAUSE_PRESENT_OR_CREATE) + (OMP_CLAUSE_DEVICEPTR, OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER) + (OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ, OMP_CLAUSE_INDEPENDENT) + (OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE_DEVICE_RESIDENT) + (OMP_CLAUSE_HOST_SELF, OMP_CLAUSE_OACC_DEVICE, OMP_CLAUSE_WAIT) + (OMP_CLAUSE_DELETE, OMP_CLAUSE_AUTO, OMP_CLAUSE_TILE): New macros. + (gfc_match_omp_clauses): Handle those. + (OACC_PARALLEL_CLAUSES, OACC_KERNELS_CLAUSES, OACC_DATA_CLAUSES) + (OACC_LOOP_CLAUSES, OACC_PARALLEL_LOOP_CLAUSES) + (OACC_KERNELS_LOOP_CLAUSES, OACC_HOST_DATA_CLAUSES) + (OACC_DECLARE_CLAUSES, OACC_UPDATE_CLAUSES) + (OACC_ENTER_DATA_CLAUSES, OACC_EXIT_DATA_CLAUSES) + (OACC_WAIT_CLAUSES): New macros. + (gfc_free_expr_list, match_oacc_expr_list, match_oacc_clause_gang) + (gfc_match_omp_map_clause, gfc_match_oacc_parallel_loop) + (gfc_match_oacc_parallel, gfc_match_oacc_kernels_loop) + (gfc_match_oacc_kernels, gfc_match_oacc_data) + (gfc_match_oacc_host_data, gfc_match_oacc_loop) + (gfc_match_oacc_declare, gfc_match_oacc_update) + (gfc_match_oacc_enter_data, gfc_match_oacc_exit_data) + (gfc_match_oacc_wait, gfc_match_oacc_cache) + (gfc_match_oacc_routine, oacc_is_loop) + (resolve_oacc_scalar_int_expr, resolve_oacc_positive_int_expr) + (check_symbol_not_pointer, check_array_not_assumed) + (resolve_oacc_data_clauses, resolve_oacc_deviceptr_clause) + (oacc_compatible_clauses, oacc_is_parallel, oacc_is_kernels) + (omp_code_to_statement, oacc_code_to_statement) + (resolve_oacc_directive_inside_omp_region) + (resolve_omp_directive_inside_oacc_region) + (resolve_oacc_nested_loops, resolve_oacc_params_in_parallel) + (resolve_oacc_loop_blocks, gfc_resolve_oacc_blocks) + (resolve_oacc_loop, resolve_oacc_cache, gfc_resolve_oacc_declare) + (gfc_resolve_oacc_directive): New functions. + * parse.c (next_free): Update for OpenACC. Move some code into... + (verify_token_free): ... this new function. + (next_fixed): Update for OpenACC. Move some code into... + (verify_token_fixed): ... this new function. + (case_executable): Add ST_OACC_UPDATE, ST_OACC_WAIT, + ST_OACC_CACHE, ST_OACC_ENTER_DATA, and ST_OACC_EXIT_DATA. + (case_exec_markers): Add ST_OACC_PARALLEL_LOOP, ST_OACC_PARALLEL, + ST_OACC_KERNELS, ST_OACC_DATA, ST_OACC_HOST_DATA, ST_OACC_LOOP, + ST_OACC_KERNELS_LOOP. + (case_decl): Add ST_OACC_ROUTINE. + (push_state, parse_critical_block, parse_progunit): Update for + OpenACC. + (gfc_ascii_statement): Handle ST_OACC_PARALLEL_LOOP, + ST_OACC_END_PARALLEL_LOOP, ST_OACC_PARALLEL, ST_OACC_END_PARALLEL, + ST_OACC_KERNELS, ST_OACC_END_KERNELS, ST_OACC_KERNELS_LOOP, + ST_OACC_END_KERNELS_LOOP, ST_OACC_DATA, ST_OACC_END_DATA, + ST_OACC_HOST_DATA, ST_OACC_END_HOST_DATA, ST_OACC_LOOP, + ST_OACC_END_LOOP, ST_OACC_DECLARE, ST_OACC_UPDATE, ST_OACC_WAIT, + ST_OACC_CACHE, ST_OACC_ENTER_DATA, ST_OACC_EXIT_DATA, + ST_OACC_ROUTINE. + (verify_st_order, parse_spec): Handle ST_OACC_DECLARE. + (parse_executable): Handle ST_OACC_PARALLEL_LOOP, + ST_OACC_KERNELS_LOOP, ST_OACC_LOOP, ST_OACC_PARALLEL, + ST_OACC_KERNELS, ST_OACC_DATA, ST_OACC_HOST_DATA. + (decode_oacc_directive, parse_oacc_structured_block) + (parse_oacc_loop, is_oacc): New functions. + * parse.h (struct gfc_state_data): Add oacc_declare_clauses + member. + (is_oacc): New prototype. + * resolve.c (gfc_resolve_blocks, gfc_resolve_code): Handle + EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_PARALLEL, + EXEC_OACC_KERNELS_LOOP, EXEC_OACC_KERNELS, EXEC_OACC_DATA, + EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, EXEC_OACC_UPDATE, + EXEC_OACC_WAIT, EXEC_OACC_CACHE, EXEC_OACC_ENTER_DATA, + EXEC_OACC_EXIT_DATA. + (resolve_codes): Call gfc_resolve_oacc_declare. + * scanner.c (openacc_flag, openacc_locus): New variables. + (skip_free_comments): Update for OpenACC. Move some code into... + (skip_omp_attribute): ... this new function. + (skip_oacc_attribute): New function. + (skip_fixed_comments, gfc_next_char_literal): Update for OpenACC. + * st.c (gfc_free_statement): Handle EXEC_OACC_PARALLEL_LOOP, + EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS_LOOP, EXEC_OACC_KERNELS, + EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, + EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, + EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. + * trans-decl.c (gfc_generate_function_code): Update for OpenACC. + * trans-openmp.c: Include "gomp-constants.h". + (gfc_omp_finish_clause, gfc_trans_omp_clauses): Use GOMP_MAP_* + instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. + (gfc_trans_omp_clauses): Handle OMP_LIST_USE_DEVICE, + OMP_LIST_DEVICE_RESIDENT, OMP_LIST_CACHE, and OMP_MAP_FORCE_ALLOC, + OMP_MAP_FORCE_DEALLOC, OMP_MAP_FORCE_TO, OMP_MAP_FORCE_FROM, + OMP_MAP_FORCE_TOFROM, OMP_MAP_FORCE_PRESENT, + OMP_MAP_FORCE_DEVICEPTR, and gfc_omp_clauses' async, seq, + independent, wait_list, num_gangs_expr, num_workers_expr, + vector_length_expr, vector, vector_expr, worker, worker_expr, + gang, gang_expr members. + (gfc_trans_omp_do): Handle EXEC_OACC_LOOP. + (gfc_convert_expr_to_tree, gfc_trans_oacc_construct) + (gfc_trans_oacc_executable_directive) + (gfc_trans_oacc_wait_directive, gfc_trans_oacc_combined_directive) + (gfc_trans_oacc_declare, gfc_trans_oacc_directive): New functions. + * trans-stmt.c (gfc_trans_block_construct): Update for OpenACC. + * trans-stmt.h (gfc_trans_oacc_directive, gfc_trans_oacc_declare): + New prototypes. + * trans.c (tranc_code): Handle EXEC_OACC_CACHE, EXEC_OACC_WAIT, + EXEC_OACC_UPDATE, EXEC_OACC_LOOP, EXEC_OACC_HOST_DATA, + EXEC_OACC_DATA, EXEC_OACC_KERNELS, EXEC_OACC_KERNELS_LOOP, + EXEC_OACC_PARALLEL, EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_ENTER_DATA, + EXEC_OACC_EXIT_DATA. + * gfortran.texi: Update for OpenACC. + * intrinsic.texi: Likewise. + * invoke.texi: Likewise. + 2015-01-15 Janus Weil PR fortran/58023 diff --git a/gcc/fortran/cpp.c b/gcc/fortran/cpp.c index b30f90e05c5..e239f21b565 100644 --- a/gcc/fortran/cpp.c +++ b/gcc/fortran/cpp.c @@ -179,6 +179,9 @@ cpp_define_builtins (cpp_reader *pfile) cpp_define (pfile, "__GFORTRAN__=1"); cpp_define (pfile, "_LANGUAGE_FORTRAN=1"); + if (flag_openacc) + cpp_define (pfile, "_OPENACC=201306"); + if (flag_openmp) cpp_define (pfile, "_OPENMP=201307"); diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c index 6d587c28a19..83ecbaa3d82 100644 --- a/gcc/fortran/dump-parse-tree.c +++ b/gcc/fortran/dump-parse-tree.c @@ -1072,7 +1072,265 @@ show_omp_namelist (int list_type, gfc_omp_namelist *n) } } -/* Show a single OpenMP directive node and everything underneath it + +/* Show OpenMP or OpenACC clauses. */ + +static void +show_omp_clauses (gfc_omp_clauses *omp_clauses) +{ + int list_type; + + switch (omp_clauses->cancel) + { + case OMP_CANCEL_UNKNOWN: + break; + case OMP_CANCEL_PARALLEL: + fputs (" PARALLEL", dumpfile); + break; + case OMP_CANCEL_SECTIONS: + fputs (" SECTIONS", dumpfile); + break; + case OMP_CANCEL_DO: + fputs (" DO", dumpfile); + break; + case OMP_CANCEL_TASKGROUP: + fputs (" TASKGROUP", dumpfile); + break; + } + if (omp_clauses->if_expr) + { + fputs (" IF(", dumpfile); + show_expr (omp_clauses->if_expr); + fputc (')', dumpfile); + } + if (omp_clauses->final_expr) + { + fputs (" FINAL(", dumpfile); + show_expr (omp_clauses->final_expr); + fputc (')', dumpfile); + } + if (omp_clauses->num_threads) + { + fputs (" NUM_THREADS(", dumpfile); + show_expr (omp_clauses->num_threads); + fputc (')', dumpfile); + } + if (omp_clauses->async) + { + fputs (" ASYNC", dumpfile); + if (omp_clauses->async_expr) + { + fputc ('(', dumpfile); + show_expr (omp_clauses->async_expr); + fputc (')', dumpfile); + } + } + if (omp_clauses->num_gangs_expr) + { + fputs (" NUM_GANGS(", dumpfile); + show_expr (omp_clauses->num_gangs_expr); + fputc (')', dumpfile); + } + if (omp_clauses->num_workers_expr) + { + fputs (" NUM_WORKERS(", dumpfile); + show_expr (omp_clauses->num_workers_expr); + fputc (')', dumpfile); + } + if (omp_clauses->vector_length_expr) + { + fputs (" VECTOR_LENGTH(", dumpfile); + show_expr (omp_clauses->vector_length_expr); + fputc (')', dumpfile); + } + if (omp_clauses->gang) + { + fputs (" GANG", dumpfile); + if (omp_clauses->gang_expr) + { + fputc ('(', dumpfile); + show_expr (omp_clauses->gang_expr); + fputc (')', dumpfile); + } + } + if (omp_clauses->worker) + { + fputs (" WORKER", dumpfile); + if (omp_clauses->worker_expr) + { + fputc ('(', dumpfile); + show_expr (omp_clauses->worker_expr); + fputc (')', dumpfile); + } + } + if (omp_clauses->vector) + { + fputs (" VECTOR", dumpfile); + if (omp_clauses->vector_expr) + { + fputc ('(', dumpfile); + show_expr (omp_clauses->vector_expr); + fputc (')', dumpfile); + } + } + if (omp_clauses->sched_kind != OMP_SCHED_NONE) + { + const char *type; + switch (omp_clauses->sched_kind) + { + case OMP_SCHED_STATIC: type = "STATIC"; break; + case OMP_SCHED_DYNAMIC: type = "DYNAMIC"; break; + case OMP_SCHED_GUIDED: type = "GUIDED"; break; + case OMP_SCHED_RUNTIME: type = "RUNTIME"; break; + case OMP_SCHED_AUTO: type = "AUTO"; break; + default: + gcc_unreachable (); + } + fprintf (dumpfile, " SCHEDULE (%s", type); + if (omp_clauses->chunk_size) + { + fputc (',', dumpfile); + show_expr (omp_clauses->chunk_size); + } + fputc (')', dumpfile); + } + if (omp_clauses->default_sharing != OMP_DEFAULT_UNKNOWN) + { + const char *type; + switch (omp_clauses->default_sharing) + { + case OMP_DEFAULT_NONE: type = "NONE"; break; + case OMP_DEFAULT_PRIVATE: type = "PRIVATE"; break; + case OMP_DEFAULT_SHARED: type = "SHARED"; break; + case OMP_DEFAULT_FIRSTPRIVATE: type = "FIRSTPRIVATE"; break; + default: + gcc_unreachable (); + } + fprintf (dumpfile, " DEFAULT(%s)", type); + } + if (omp_clauses->tile_list) + { + gfc_expr_list *list; + fputs (" TILE(", dumpfile); + for (list = omp_clauses->tile_list; list; list = list->next) + { + show_expr (list->expr); + if (list->next) + fputs (", ", dumpfile); + } + fputc (')', dumpfile); + } + if (omp_clauses->wait_list) + { + gfc_expr_list *list; + fputs (" WAIT(", dumpfile); + for (list = omp_clauses->wait_list; list; list = list->next) + { + show_expr (list->expr); + if (list->next) + fputs (", ", dumpfile); + } + fputc (')', dumpfile); + } + if (omp_clauses->seq) + fputs (" SEQ", dumpfile); + if (omp_clauses->independent) + fputs (" INDEPENDENT", dumpfile); + if (omp_clauses->ordered) + fputs (" ORDERED", dumpfile); + if (omp_clauses->untied) + fputs (" UNTIED", dumpfile); + if (omp_clauses->mergeable) + fputs (" MERGEABLE", dumpfile); + if (omp_clauses->collapse) + fprintf (dumpfile, " COLLAPSE(%d)", omp_clauses->collapse); + for (list_type = 0; list_type < OMP_LIST_NUM; list_type++) + if (omp_clauses->lists[list_type] != NULL + && list_type != OMP_LIST_COPYPRIVATE) + { + const char *type = NULL; + switch (list_type) + { + case OMP_LIST_USE_DEVICE: type = "USE_DEVICE"; break; + case OMP_LIST_DEVICE_RESIDENT: type = "USE_DEVICE"; break; + case OMP_LIST_CACHE: type = ""; break; + case OMP_LIST_PRIVATE: type = "PRIVATE"; break; + case OMP_LIST_FIRSTPRIVATE: type = "FIRSTPRIVATE"; break; + case OMP_LIST_LASTPRIVATE: type = "LASTPRIVATE"; break; + case OMP_LIST_SHARED: type = "SHARED"; break; + case OMP_LIST_COPYIN: type = "COPYIN"; break; + case OMP_LIST_UNIFORM: type = "UNIFORM"; break; + case OMP_LIST_ALIGNED: type = "ALIGNED"; break; + case OMP_LIST_LINEAR: type = "LINEAR"; break; + case OMP_LIST_REDUCTION: type = "REDUCTION"; break; + case OMP_LIST_DEPEND: type = "DEPEND"; break; + default: + gcc_unreachable (); + } + fprintf (dumpfile, " %s(", type); + show_omp_namelist (list_type, omp_clauses->lists[list_type]); + fputc (')', dumpfile); + } + if (omp_clauses->safelen_expr) + { + fputs (" SAFELEN(", dumpfile); + show_expr (omp_clauses->safelen_expr); + fputc (')', dumpfile); + } + if (omp_clauses->simdlen_expr) + { + fputs (" SIMDLEN(", dumpfile); + show_expr (omp_clauses->simdlen_expr); + fputc (')', dumpfile); + } + if (omp_clauses->inbranch) + fputs (" INBRANCH", dumpfile); + if (omp_clauses->notinbranch) + fputs (" NOTINBRANCH", dumpfile); + if (omp_clauses->proc_bind != OMP_PROC_BIND_UNKNOWN) + { + const char *type; + switch (omp_clauses->proc_bind) + { + case OMP_PROC_BIND_MASTER: type = "MASTER"; break; + case OMP_PROC_BIND_SPREAD: type = "SPREAD"; break; + case OMP_PROC_BIND_CLOSE: type = "CLOSE"; break; + default: + gcc_unreachable (); + } + fprintf (dumpfile, " PROC_BIND(%s)", type); + } + if (omp_clauses->num_teams) + { + fputs (" NUM_TEAMS(", dumpfile); + show_expr (omp_clauses->num_teams); + fputc (')', dumpfile); + } + if (omp_clauses->device) + { + fputs (" DEVICE(", dumpfile); + show_expr (omp_clauses->device); + fputc (')', dumpfile); + } + if (omp_clauses->thread_limit) + { + fputs (" THREAD_LIMIT(", dumpfile); + show_expr (omp_clauses->thread_limit); + fputc (')', dumpfile); + } + if (omp_clauses->dist_sched_kind != OMP_SCHED_NONE) + { + fprintf (dumpfile, " DIST_SCHEDULE (static"); + if (omp_clauses->dist_chunk_size) + { + fputc (',', dumpfile); + show_expr (omp_clauses->dist_chunk_size); + } + fputc (')', dumpfile); + } +} + +/* Show a single OpenMP or OpenACC directive node and everything underneath it if necessary. */ static void @@ -1080,9 +1338,22 @@ show_omp_node (int level, gfc_code *c) { gfc_omp_clauses *omp_clauses = NULL; const char *name = NULL; + bool is_oacc = false; switch (c->op) { + case EXEC_OACC_PARALLEL_LOOP: name = "PARALLEL LOOP"; is_oacc = true; break; + case EXEC_OACC_PARALLEL: name = "PARALLEL"; is_oacc = true; break; + case EXEC_OACC_KERNELS_LOOP: name = "KERNELS LOOP"; is_oacc = true; break; + case EXEC_OACC_KERNELS: name = "KERNELS"; is_oacc = true; break; + case EXEC_OACC_DATA: name = "DATA"; is_oacc = true; break; + case EXEC_OACC_HOST_DATA: name = "HOST_DATA"; is_oacc = true; break; + case EXEC_OACC_LOOP: name = "LOOP"; is_oacc = true; break; + case EXEC_OACC_UPDATE: name = "UPDATE"; is_oacc = true; break; + case EXEC_OACC_WAIT: name = "WAIT"; is_oacc = true; break; + case EXEC_OACC_CACHE: name = "CACHE"; is_oacc = true; break; + case EXEC_OACC_ENTER_DATA: name = "ENTER DATA"; is_oacc = true; break; + case EXEC_OACC_EXIT_DATA: name = "EXIT DATA"; is_oacc = true; break; case EXEC_OMP_ATOMIC: name = "ATOMIC"; break; case EXEC_OMP_BARRIER: name = "BARRIER"; break; case EXEC_OMP_CANCEL: name = "CANCEL"; break; @@ -1109,9 +1380,21 @@ show_omp_node (int level, gfc_code *c) default: gcc_unreachable (); } - fprintf (dumpfile, "!$OMP %s", name); + fprintf (dumpfile, "!$%s %s", is_oacc ? "ACC" : "OMP", name); switch (c->op) { + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_LOOP: + case EXEC_OACC_UPDATE: + case EXEC_OACC_WAIT: + case EXEC_OACC_CACHE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: case EXEC_OMP_CANCEL: case EXEC_OMP_CANCELLATION_POINT: case EXEC_OMP_DO: @@ -1148,170 +1431,13 @@ show_omp_node (int level, gfc_code *c) break; } if (omp_clauses) - { - int list_type; - - switch (omp_clauses->cancel) - { - case OMP_CANCEL_UNKNOWN: - break; - case OMP_CANCEL_PARALLEL: - fputs (" PARALLEL", dumpfile); - break; - case OMP_CANCEL_SECTIONS: - fputs (" SECTIONS", dumpfile); - break; - case OMP_CANCEL_DO: - fputs (" DO", dumpfile); - break; - case OMP_CANCEL_TASKGROUP: - fputs (" TASKGROUP", dumpfile); - break; - } - if (omp_clauses->if_expr) - { - fputs (" IF(", dumpfile); - show_expr (omp_clauses->if_expr); - fputc (')', dumpfile); - } - if (omp_clauses->final_expr) - { - fputs (" FINAL(", dumpfile); - show_expr (omp_clauses->final_expr); - fputc (')', dumpfile); - } - if (omp_clauses->num_threads) - { - fputs (" NUM_THREADS(", dumpfile); - show_expr (omp_clauses->num_threads); - fputc (')', dumpfile); - } - if (omp_clauses->sched_kind != OMP_SCHED_NONE) - { - const char *type; - switch (omp_clauses->sched_kind) - { - case OMP_SCHED_STATIC: type = "STATIC"; break; - case OMP_SCHED_DYNAMIC: type = "DYNAMIC"; break; - case OMP_SCHED_GUIDED: type = "GUIDED"; break; - case OMP_SCHED_RUNTIME: type = "RUNTIME"; break; - case OMP_SCHED_AUTO: type = "AUTO"; break; - default: - gcc_unreachable (); - } - fprintf (dumpfile, " SCHEDULE (%s", type); - if (omp_clauses->chunk_size) - { - fputc (',', dumpfile); - show_expr (omp_clauses->chunk_size); - } - fputc (')', dumpfile); - } - if (omp_clauses->default_sharing != OMP_DEFAULT_UNKNOWN) - { - const char *type; - switch (omp_clauses->default_sharing) - { - case OMP_DEFAULT_NONE: type = "NONE"; break; - case OMP_DEFAULT_PRIVATE: type = "PRIVATE"; break; - case OMP_DEFAULT_SHARED: type = "SHARED"; break; - case OMP_DEFAULT_FIRSTPRIVATE: type = "FIRSTPRIVATE"; break; - default: - gcc_unreachable (); - } - fprintf (dumpfile, " DEFAULT(%s)", type); - } - if (omp_clauses->ordered) - fputs (" ORDERED", dumpfile); - if (omp_clauses->untied) - fputs (" UNTIED", dumpfile); - if (omp_clauses->mergeable) - fputs (" MERGEABLE", dumpfile); - if (omp_clauses->collapse) - fprintf (dumpfile, " COLLAPSE(%d)", omp_clauses->collapse); - for (list_type = 0; list_type < OMP_LIST_NUM; list_type++) - if (omp_clauses->lists[list_type] != NULL - && list_type != OMP_LIST_COPYPRIVATE) - { - const char *type = NULL; - switch (list_type) - { - case OMP_LIST_PRIVATE: type = "PRIVATE"; break; - case OMP_LIST_FIRSTPRIVATE: type = "FIRSTPRIVATE"; break; - case OMP_LIST_LASTPRIVATE: type = "LASTPRIVATE"; break; - case OMP_LIST_SHARED: type = "SHARED"; break; - case OMP_LIST_COPYIN: type = "COPYIN"; break; - case OMP_LIST_UNIFORM: type = "UNIFORM"; break; - case OMP_LIST_ALIGNED: type = "ALIGNED"; break; - case OMP_LIST_LINEAR: type = "LINEAR"; break; - case OMP_LIST_REDUCTION: type = "REDUCTION"; break; - case OMP_LIST_DEPEND: type = "DEPEND"; break; - default: - gcc_unreachable (); - } - fprintf (dumpfile, " %s(", type); - show_omp_namelist (list_type, omp_clauses->lists[list_type]); - fputc (')', dumpfile); - } - if (omp_clauses->safelen_expr) - { - fputs (" SAFELEN(", dumpfile); - show_expr (omp_clauses->safelen_expr); - fputc (')', dumpfile); - } - if (omp_clauses->simdlen_expr) - { - fputs (" SIMDLEN(", dumpfile); - show_expr (omp_clauses->simdlen_expr); - fputc (')', dumpfile); - } - if (omp_clauses->inbranch) - fputs (" INBRANCH", dumpfile); - if (omp_clauses->notinbranch) - fputs (" NOTINBRANCH", dumpfile); - if (omp_clauses->proc_bind != OMP_PROC_BIND_UNKNOWN) - { - const char *type; - switch (omp_clauses->proc_bind) - { - case OMP_PROC_BIND_MASTER: type = "MASTER"; break; - case OMP_PROC_BIND_SPREAD: type = "SPREAD"; break; - case OMP_PROC_BIND_CLOSE: type = "CLOSE"; break; - default: - gcc_unreachable (); - } - fprintf (dumpfile, " PROC_BIND(%s)", type); - } - if (omp_clauses->num_teams) - { - fputs (" NUM_TEAMS(", dumpfile); - show_expr (omp_clauses->num_teams); - fputc (')', dumpfile); - } - if (omp_clauses->device) - { - fputs (" DEVICE(", dumpfile); - show_expr (omp_clauses->device); - fputc (')', dumpfile); - } - if (omp_clauses->thread_limit) - { - fputs (" THREAD_LIMIT(", dumpfile); - show_expr (omp_clauses->thread_limit); - fputc (')', dumpfile); - } - if (omp_clauses->dist_sched_kind != OMP_SCHED_NONE) - { - fprintf (dumpfile, " DIST_SCHEDULE (static"); - if (omp_clauses->dist_chunk_size) - { - fputc (',', dumpfile); - show_expr (omp_clauses->dist_chunk_size); - } - fputc (')', dumpfile); - } - } + show_omp_clauses (omp_clauses); fputc ('\n', dumpfile); + + /* OpenACC executable directives don't have associated blocks. */ + if (c->op == EXEC_OACC_CACHE || c->op == EXEC_OACC_UPDATE + || c->op == EXEC_OACC_ENTER_DATA || c->op == EXEC_OACC_EXIT_DATA) + return; if (c->op == EXEC_OMP_SECTIONS || c->op == EXEC_OMP_PARALLEL_SECTIONS) { gfc_code *d = c->block; @@ -1331,7 +1457,7 @@ show_omp_node (int level, gfc_code *c) return; fputc ('\n', dumpfile); code_indent (level, 0); - fprintf (dumpfile, "!$OMP END %s", name); + fprintf (dumpfile, "!$%s END %s", is_oacc ? "ACC" : "OMP", name); if (omp_clauses != NULL) { if (omp_clauses->lists[OMP_LIST_COPYPRIVATE]) @@ -2311,6 +2437,18 @@ show_code_node (int level, gfc_code *c) fprintf (dumpfile, " EOR=%d", dt->eor->value); break; + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_LOOP: + case EXEC_OACC_UPDATE: + case EXEC_OACC_WAIT: + case EXEC_OACC_CACHE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: case EXEC_OMP_ATOMIC: case EXEC_OMP_CANCEL: case EXEC_OMP_CANCELLATION_POINT: @@ -2432,6 +2570,14 @@ show_namespace (gfc_namespace *ns) for (eq = ns->equiv; eq; eq = eq->next) show_equiv (eq); + if (ns->oacc_declare_clauses) + { + /* Dump !$ACC DECLARE clauses. */ + show_indent (); + fprintf (dumpfile, "!$ACC DECLARE"); + show_omp_clauses (ns->oacc_declare_clauses); + } + fputc ('\n', dumpfile); show_indent (); fputs ("code:", dumpfile); diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c index ff57de0d453..449f01aa8cf 100644 --- a/gcc/fortran/f95-lang.c +++ b/gcc/fortran/f95-lang.c @@ -672,6 +672,11 @@ gfc_init_builtin_functions (void) #define DEF_FUNCTION_TYPE_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ ARG6, ARG7, ARG8) NAME, #define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME, +#define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME, +#define DEF_FUNCTION_TYPE_VAR_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) NAME, +#define DEF_FUNCTION_TYPE_VAR_12(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, ARG12) NAME, #define DEF_POINTER_TYPE(NAME, TYPE) NAME, #include "types.def" #undef DEF_PRIMITIVE_TYPE @@ -685,6 +690,9 @@ gfc_init_builtin_functions (void) #undef DEF_FUNCTION_TYPE_7 #undef DEF_FUNCTION_TYPE_8 #undef DEF_FUNCTION_TYPE_VAR_0 +#undef DEF_FUNCTION_TYPE_VAR_2 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE BT_LAST }; @@ -1119,6 +1127,42 @@ gfc_init_builtin_functions (void) builtin_types[(int) ENUM] \ = build_varargs_function_type_list (builtin_types[(int) RETURN], \ NULL_TREE); +#define DEF_FUNCTION_TYPE_VAR_2(ENUM, RETURN, ARG1, ARG2) \ + builtin_types[(int) ENUM] \ + = build_varargs_function_type_list (builtin_types[(int) RETURN], \ + builtin_types[(int) ARG1], \ + builtin_types[(int) ARG2], \ + NULL_TREE); +#define DEF_FUNCTION_TYPE_VAR_8(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) \ + builtin_types[(int) ENUM] \ + = build_varargs_function_type_list (builtin_types[(int) RETURN], \ + builtin_types[(int) ARG1], \ + builtin_types[(int) ARG2], \ + builtin_types[(int) ARG3], \ + builtin_types[(int) ARG4], \ + builtin_types[(int) ARG5], \ + builtin_types[(int) ARG6], \ + builtin_types[(int) ARG7], \ + builtin_types[(int) ARG8], \ + NULL_TREE); +#define DEF_FUNCTION_TYPE_VAR_12(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, ARG12) \ + builtin_types[(int) ENUM] \ + = build_varargs_function_type_list (builtin_types[(int) RETURN], \ + builtin_types[(int) ARG1], \ + builtin_types[(int) ARG2], \ + builtin_types[(int) ARG3], \ + builtin_types[(int) ARG4], \ + builtin_types[(int) ARG5], \ + builtin_types[(int) ARG6], \ + builtin_types[(int) ARG7], \ + builtin_types[(int) ARG8], \ + builtin_types[(int) ARG9], \ + builtin_types[(int) ARG10], \ + builtin_types[(int) ARG11], \ + builtin_types[(int) ARG12], \ + NULL_TREE); #define DEF_POINTER_TYPE(ENUM, TYPE) \ builtin_types[(int) ENUM] \ = build_pointer_type (builtin_types[(int) TYPE]); @@ -1134,6 +1178,9 @@ gfc_init_builtin_functions (void) #undef DEF_FUNCTION_TYPE_7 #undef DEF_FUNCTION_TYPE_8 #undef DEF_FUNCTION_TYPE_VAR_0 +#undef DEF_FUNCTION_TYPE_VAR_2 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE builtin_types[(int) BT_LAST] = NULL_TREE; @@ -1145,13 +1192,36 @@ gfc_init_builtin_functions (void) #include "../sync-builtins.def" #undef DEF_SYNC_BUILTIN + if (flag_openacc) + { +#undef DEF_GOACC_BUILTIN +#define DEF_GOACC_BUILTIN(code, name, type, attr) \ + gfc_define_builtin ("__builtin_" name, builtin_types[type], \ + code, name, attr); +#undef DEF_GOACC_BUILTIN_COMPILER +#define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr) \ + gfc_define_builtin (name, builtin_types[type], code, name, attr); +#undef DEF_GOMP_BUILTIN +#define DEF_GOMP_BUILTIN(code, name, type, attr) /* ignore */ +#include "../omp-builtins.def" +#undef DEF_GOACC_BUILTIN +#undef DEF_GOACC_BUILTIN_COMPILER +#undef DEF_GOMP_BUILTIN + } + if (flag_openmp || flag_openmp_simd || flag_tree_parallelize_loops) { +#undef DEF_GOACC_BUILTIN +#define DEF_GOACC_BUILTIN(code, name, type, attr) /* ignore */ +#undef DEF_GOACC_BUILTIN_COMPILER +#define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr) /* ignore */ #undef DEF_GOMP_BUILTIN #define DEF_GOMP_BUILTIN(code, name, type, attr) \ gfc_define_builtin ("__builtin_" name, builtin_types[type], \ code, name, attr); #include "../omp-builtins.def" +#undef DEF_GOACC_BUILTIN +#undef DEF_GOACC_BUILTIN_COMPILER #undef DEF_GOMP_BUILTIN } diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 435fd63983a..4e2089534a6 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -216,6 +216,12 @@ typedef enum ST_WRITE, ST_ASSIGNMENT, ST_POINTER_ASSIGNMENT, ST_SELECT_CASE, ST_SEQUENCE, ST_SIMPLE_IF, ST_STATEMENT_FUNCTION, ST_DERIVED_DECL, ST_LABEL_ASSIGNMENT, ST_ENUM, ST_ENUMERATOR, ST_END_ENUM, ST_SELECT_TYPE, ST_TYPE_IS, ST_CLASS_IS, + ST_OACC_PARALLEL_LOOP, ST_OACC_END_PARALLEL_LOOP, ST_OACC_PARALLEL, + ST_OACC_END_PARALLEL, ST_OACC_KERNELS, ST_OACC_END_KERNELS, ST_OACC_DATA, + ST_OACC_END_DATA, ST_OACC_HOST_DATA, ST_OACC_END_HOST_DATA, ST_OACC_LOOP, + ST_OACC_END_LOOP, ST_OACC_DECLARE, ST_OACC_UPDATE, ST_OACC_WAIT, + ST_OACC_CACHE, ST_OACC_KERNELS_LOOP, ST_OACC_END_KERNELS_LOOP, + ST_OACC_ENTER_DATA, ST_OACC_EXIT_DATA, ST_OACC_ROUTINE, ST_OMP_ATOMIC, ST_OMP_BARRIER, ST_OMP_CRITICAL, ST_OMP_END_ATOMIC, ST_OMP_END_CRITICAL, ST_OMP_END_DO, ST_OMP_END_MASTER, ST_OMP_END_ORDERED, ST_OMP_END_PARALLEL, ST_OMP_END_PARALLEL_DO, ST_OMP_END_PARALLEL_SECTIONS, @@ -1067,6 +1073,16 @@ gfc_namelist; #define gfc_get_namelist() XCNEW (gfc_namelist) +/* Likewise to gfc_namelist, but contains expressions. */ +typedef struct gfc_expr_list +{ + struct gfc_expr *expr; + struct gfc_expr_list *next; +} +gfc_expr_list; + +#define gfc_get_expr_list() XCNEW (gfc_expr_list) + typedef enum { OMP_REDUCTION_NONE = -1, @@ -1099,7 +1115,14 @@ typedef enum OMP_MAP_ALLOC, OMP_MAP_TO, OMP_MAP_FROM, - OMP_MAP_TOFROM + OMP_MAP_TOFROM, + OMP_MAP_FORCE_ALLOC, + OMP_MAP_FORCE_DEALLOC, + OMP_MAP_FORCE_TO, + OMP_MAP_FORCE_FROM, + OMP_MAP_FORCE_TOFROM, + OMP_MAP_FORCE_PRESENT, + OMP_MAP_FORCE_DEVICEPTR } gfc_omp_map_op; @@ -1125,7 +1148,8 @@ gfc_omp_namelist; enum { - OMP_LIST_PRIVATE, + OMP_LIST_FIRST, + OMP_LIST_PRIVATE = OMP_LIST_FIRST, OMP_LIST_FIRSTPRIVATE, OMP_LIST_LASTPRIVATE, OMP_LIST_COPYPRIVATE, @@ -1139,6 +1163,9 @@ enum OMP_LIST_TO, OMP_LIST_FROM, OMP_LIST_REDUCTION, + OMP_LIST_DEVICE_RESIDENT, + OMP_LIST_USE_DEVICE, + OMP_LIST_CACHE, OMP_LIST_NUM }; @@ -1202,6 +1229,21 @@ typedef struct gfc_omp_clauses struct gfc_expr *thread_limit; enum gfc_omp_sched_kind dist_sched_kind; struct gfc_expr *dist_chunk_size; + + /* OpenACC. */ + struct gfc_expr *async_expr; + struct gfc_expr *gang_expr; + struct gfc_expr *worker_expr; + struct gfc_expr *vector_expr; + struct gfc_expr *num_gangs_expr; + struct gfc_expr *num_workers_expr; + struct gfc_expr *vector_length_expr; + gfc_expr_list *wait_list; + gfc_expr_list *tile_list; + unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; + unsigned wait:1, par_auto:1, gang_static:1; + locus loc; + } gfc_omp_clauses; @@ -1609,6 +1651,9 @@ typedef struct gfc_namespace this namespace. */ struct gfc_data *data, *old_data; + /* !$ACC DECLARE clauses. */ + gfc_omp_clauses *oacc_declare_clauses; + gfc_charlen *cl_list, *old_cl_list; gfc_dt_list *derived_types; @@ -2276,6 +2321,10 @@ typedef enum EXEC_READ, EXEC_WRITE, EXEC_IOLENGTH, EXEC_TRANSFER, EXEC_DT_END, EXEC_BACKSPACE, EXEC_ENDFILE, EXEC_INQUIRE, EXEC_REWIND, EXEC_FLUSH, EXEC_LOCK, EXEC_UNLOCK, + EXEC_OACC_KERNELS_LOOP, EXEC_OACC_PARALLEL_LOOP, + EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS, EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, + EXEC_OACC_LOOP, EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, + EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA, EXEC_OMP_CRITICAL, EXEC_OMP_DO, EXEC_OMP_FLUSH, EXEC_OMP_MASTER, EXEC_OMP_ORDERED, EXEC_OMP_PARALLEL, EXEC_OMP_PARALLEL_DO, EXEC_OMP_PARALLEL_SECTIONS, EXEC_OMP_PARALLEL_WORKSHARE, @@ -2877,6 +2926,11 @@ void gfc_resolve_omp_declare_simd (gfc_namespace *); void gfc_resolve_omp_udrs (gfc_symtree *); void gfc_omp_save_and_clear_state (struct gfc_omp_saved_state *); void gfc_omp_restore_state (struct gfc_omp_saved_state *); +void gfc_free_expr_list (gfc_expr_list *); +void gfc_resolve_oacc_directive (gfc_code *, gfc_namespace *); +void gfc_resolve_oacc_declare (gfc_namespace *); +void gfc_resolve_oacc_parallel_loop_blocks (gfc_code *, gfc_namespace *); +void gfc_resolve_oacc_blocks (gfc_code *, gfc_namespace *); /* expr.c */ void gfc_free_actual_arglist (gfc_actual_arglist *); diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 5cc624a12bc..300b8b8440c 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -477,6 +477,10 @@ used on real-world programs. In particular, the supported extensions include OpenMP, Cray-style pointers, and several Fortran 2003 and Fortran 2008 features, including TR 15581. However, it is still under development and has a few remaining rough edges. +There also is initial support for OpenACC. +Note that this is an experimental feature, incomplete, and subject to +change in future versions of GCC. See +@uref{https://gcc.gnu.org/wiki/OpenACC} for more information. At present, the GNU Fortran compiler passes the @uref{http://www.fortran-2000.com/ArnaudRecipes/fcvs21_f95.html, @@ -533,6 +537,11 @@ The current status of the support is can be found in the Additionally, the GNU Fortran compilers supports the OpenMP specification (version 4.0, @url{http://openmp.org/@/wp/@/openmp-specifications/}). +There also is initial support for the OpenACC specification (targeting +version 2.0, @uref{http://www.openacc.org/}). +Note that this is an experimental feature, incomplete, and subject to +change in future versions of GCC. See +@uref{https://gcc.gnu.org/wiki/OpenACC} for more information. @node Varying Length Character Strings @subsection Varying Length Character Strings @@ -963,7 +972,8 @@ module. @cindex statement, @code{ISO_FORTRAN_ENV} @code{USE} statement with @code{INTRINSIC} and @code{NON_INTRINSIC} attribute; supported intrinsic modules: @code{ISO_FORTRAN_ENV}, -@code{ISO_C_BINDING}, @code{OMP_LIB} and @code{OMP_LIB_KINDS}. +@code{ISO_C_BINDING}, @code{OMP_LIB} and @code{OMP_LIB_KINDS}, +and @code{OPENACC}. @item Renaming of operators in the @code{USE} statement. @@ -1375,6 +1385,7 @@ without warning. * Cray pointers:: * CONVERT specifier:: * OpenMP:: +* OpenACC:: * Argument list functions:: @end menu @@ -1949,6 +1960,37 @@ to the command line. However, this is not supported by @command{gcc} and thus not recommended. @end itemize +@node OpenACC +@subsection OpenACC +@cindex OpenACC + +OpenACC is an application programming interface (API) that supports +offloading of code to accelerator devices. It consists of a set of +compiler directives, library routines, and environment variables that +influence run-time behavior. + +GNU Fortran strives to be compatible to the +@uref{http://www.openacc.org/, OpenACC Application Programming +Interface v2.0}. + +To enable the processing of the OpenACC directive @code{!$acc} in +free-form source code; the @code{c$acc}, @code{*$acc} and @code{!$acc} +directives in fixed form; the @code{!$} conditional compilation +sentinels in free form; and the @code{c$}, @code{*$} and @code{!$} +sentinels in fixed form, @command{gfortran} needs to be invoked with +the @option{-fopenacc}. This also arranges for automatic linking of +the GNU Offloading and Multi Processing Runtime Library +@ref{Top,,libgomp,libgomp,GNU Offloading and Multi Processing Runtime +Library}. + +The OpenACC Fortran runtime library routines are provided both in a +form of a Fortran 90 module named @code{openacc} and in a form of a +Fortran @code{include} file named @file{openacc_lib.h}. + +Note that this is an experimental feature, incomplete, and subject to +change in future versions of GCC. See +@uref{https://gcc.gnu.org/wiki/OpenACC} for more information. + @node Argument list functions @subsection Argument list functions @code{%VAL}, @code{%REF} and @code{%LOC} @cindex argument list functions diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 36c70d9b923..06bce150903 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -13774,6 +13774,7 @@ Fortran 95 elemental function: @ref{IEOR} * ISO_C_BINDING:: * IEEE modules:: * OpenMP Modules OMP_LIB and OMP_LIB_KINDS:: +* OpenACC Module OPENACC:: @end menu @node ISO_FORTRAN_ENV @@ -14018,6 +14019,7 @@ with the following options: @code{-fno-unsafe-math-optimizations -frounding-math -fsignaling-nans}. + @node OpenMP Modules OMP_LIB and OMP_LIB_KINDS @section OpenMP Modules @code{OMP_LIB} and @code{OMP_LIB_KINDS} @table @asis @@ -14074,3 +14076,30 @@ kind @code{omp_proc_bind_kind}: @item @code{omp_proc_bind_close} @item @code{omp_proc_bind_spread} @end table + + + +@node OpenACC Module OPENACC +@section OpenACC Module @code{OPENACC} +@table @asis +@item @emph{Standard}: +OpenACC Application Programming Interface v2.0 +@end table + + +The OpenACC Fortran runtime library routines are provided both in a +form of a Fortran 90 module, named @code{OPENACC}, and in form of a +Fortran @code{include} file named @file{openacc_lib.h}. The +procedures provided by @code{OPENACC} can be found in the +@ref{Top,,Introduction,libgomp,GNU Offloading and Multi Processing +Runtime Library} manual, the named constants defined in the modules +are listed below. + +For details refer to the actual +@uref{http://www.openacc.org/, +OpenACC Application Programming Interface v2.0}. + +@code{OPENACC} provides the scalar default-integer +named constant @code{openacc_version} with a value of the form +@var{yyyymm}, where @code{yyyy} is the year and @var{mm} the month +of the OpenACC version; for OpenACC v2.0 the value is @code{201306}. diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi index 39bc479c2cb..9228c78232f 100644 --- a/gcc/fortran/invoke.texi +++ b/gcc/fortran/invoke.texi @@ -120,7 +120,7 @@ by type. Explanations are in the following sections. -ffixed-line-length-none -ffree-form -ffree-line-length-@var{n} @gol -ffree-line-length-none -fimplicit-none -finteger-4-integer-8 @gol -fmax-identifier-length -fmodule-private -fno-fixed-form -fno-range-check @gol --fopenmp -freal-4-real-10 -freal-4-real-16 -freal-4-real-8 @gol +-fopenacc -fopenmp -freal-4-real-10 -freal-4-real-16 -freal-4-real-8 @gol -freal-8-real-10 -freal-8-real-16 -freal-8-real-4 -std=@var{std} } @@ -302,6 +302,20 @@ Specify that no implicit typing is allowed, unless overridden by explicit Enable the Cray pointer extension, which provides C-like pointer functionality. +@item -fopenacc +@opindex @code{fopenacc} +@cindex OpenACC +Enable the OpenACC extensions. This includes OpenACC @code{!$acc} +directives in free form and @code{c$acc}, @code{*$acc} and +@code{!$acc} directives in fixed form, @code{!$} conditional +compilation sentinels in free form and @code{c$}, @code{*$} and +@code{!$} sentinels in fixed form, and when linking arranges for the +OpenACC runtime library to be linked in. + +Note that this is an experimental feature, incomplete, and subject to +change in future versions of GCC. See +@w{@uref{https://gcc.gnu.org/wiki/OpenACC}} for more information. + @item -fopenmp @opindex @code{fopenmp} @cindex OpenMP diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index 530ec979e65..a7a4ed644f5 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -566,6 +566,10 @@ fmodule-private Fortran Var(flag_module_private) Set default accessibility of module entities to PRIVATE. +fopenacc +Fortran +; Documented in C + fopenmp Fortran ; Documented in C diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c index 2973d5a1c5c..8234c277243 100644 --- a/gcc/fortran/match.c +++ b/gcc/fortran/match.c @@ -2501,7 +2501,9 @@ match_exit_cycle (gfc_statement st, gfc_exec_op op) if (o != NULL) { - gfc_error ("%s statement at %C leaving OpenMP structured block", + gfc_error (is_oacc (p) + ? "%s statement at %C leaving OpenACC structured block" + : "%s statement at %C leaving OpenMP structured block", gfc_ascii_statement (st)); return MATCH_ERROR; } @@ -2511,6 +2513,33 @@ match_exit_cycle (gfc_statement st, gfc_exec_op op) if (cnt > 0 && o != NULL && o->state == COMP_OMP_STRUCTURED_BLOCK + && (o->head->op == EXEC_OACC_LOOP + || o->head->op == EXEC_OACC_PARALLEL_LOOP)) + { + int collapse = 1; + gcc_assert (o->head->next != NULL + && (o->head->next->op == EXEC_DO + || o->head->next->op == EXEC_DO_WHILE) + && o->previous != NULL + && o->previous->tail->op == o->head->op); + if (o->previous->tail->ext.omp_clauses != NULL + && o->previous->tail->ext.omp_clauses->collapse > 1) + collapse = o->previous->tail->ext.omp_clauses->collapse; + if (st == ST_EXIT && cnt <= collapse) + { + gfc_error ("EXIT statement at %C terminating !$ACC LOOP loop"); + return MATCH_ERROR; + } + if (st == ST_CYCLE && cnt < collapse) + { + gfc_error ("CYCLE statement at %C to non-innermost collapsed" + " !$ACC LOOP loop"); + return MATCH_ERROR; + } + } + if (cnt > 0 + && o != NULL + && (o->state == COMP_OMP_STRUCTURED_BLOCK) && (o->head->op == EXEC_OMP_DO || o->head->op == EXEC_OMP_PARALLEL_DO || o->head->op == EXEC_OMP_SIMD diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h index 3b49ccdb1fc..96d3ec11f3c 100644 --- a/gcc/fortran/match.h +++ b/gcc/fortran/match.h @@ -122,6 +122,22 @@ gfc_common_head *gfc_get_common (const char *, int); /* openmp.c. */ +/* OpenACC directive matchers. */ +match gfc_match_oacc_cache (void); +match gfc_match_oacc_wait (void); +match gfc_match_oacc_update (void); +match gfc_match_oacc_declare (void); +match gfc_match_oacc_loop (void); +match gfc_match_oacc_host_data (void); +match gfc_match_oacc_data (void); +match gfc_match_oacc_kernels (void); +match gfc_match_oacc_kernels_loop (void); +match gfc_match_oacc_parallel (void); +match gfc_match_oacc_parallel_loop (void); +match gfc_match_oacc_enter_data (void); +match gfc_match_oacc_exit_data (void); +match gfc_match_oacc_routine (void); + /* OpenMP directive matchers. */ match gfc_match_omp_eos (void); match gfc_match_omp_atomic (void); diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c index 65659c006ab..005739bf7c7 100644 --- a/gcc/fortran/openmp.c +++ b/gcc/fortran/openmp.c @@ -27,6 +27,8 @@ along with GCC; see the file COPYING3. If not see #include "match.h" #include "parse.h" #include "hash-set.h" +#include "diagnostic.h" +#include "gomp-constants.h" /* Match an end of OpenMP directive. End of OpenMP directive is optional whitespace, followed by '\n' or comment '!'. */ @@ -76,11 +78,33 @@ gfc_free_omp_clauses (gfc_omp_clauses *c) gfc_free_expr (c->device); gfc_free_expr (c->thread_limit); gfc_free_expr (c->dist_chunk_size); + gfc_free_expr (c->async_expr); + gfc_free_expr (c->gang_expr); + gfc_free_expr (c->worker_expr); + gfc_free_expr (c->vector_expr); + gfc_free_expr (c->num_gangs_expr); + gfc_free_expr (c->num_workers_expr); + gfc_free_expr (c->vector_length_expr); for (i = 0; i < OMP_LIST_NUM; i++) gfc_free_omp_namelist (c->lists[i]); + gfc_free_expr_list (c->wait_list); + gfc_free_expr_list (c->tile_list); free (c); } +/* Free expression list. */ +void +gfc_free_expr_list (gfc_expr_list *list) +{ + gfc_expr_list *n; + + for (; list; list = n) + { + n = list->next; + free (list); + } +} + /* Free an !$omp declare simd construct list. */ void @@ -287,45 +311,175 @@ cleanup: return MATCH_ERROR; } -#define OMP_CLAUSE_PRIVATE (1U << 0) -#define OMP_CLAUSE_FIRSTPRIVATE (1U << 1) -#define OMP_CLAUSE_LASTPRIVATE (1U << 2) -#define OMP_CLAUSE_COPYPRIVATE (1U << 3) -#define OMP_CLAUSE_SHARED (1U << 4) -#define OMP_CLAUSE_COPYIN (1U << 5) -#define OMP_CLAUSE_REDUCTION (1U << 6) -#define OMP_CLAUSE_IF (1U << 7) -#define OMP_CLAUSE_NUM_THREADS (1U << 8) -#define OMP_CLAUSE_SCHEDULE (1U << 9) -#define OMP_CLAUSE_DEFAULT (1U << 10) -#define OMP_CLAUSE_ORDERED (1U << 11) -#define OMP_CLAUSE_COLLAPSE (1U << 12) -#define OMP_CLAUSE_UNTIED (1U << 13) -#define OMP_CLAUSE_FINAL (1U << 14) -#define OMP_CLAUSE_MERGEABLE (1U << 15) -#define OMP_CLAUSE_ALIGNED (1U << 16) -#define OMP_CLAUSE_DEPEND (1U << 17) -#define OMP_CLAUSE_INBRANCH (1U << 18) -#define OMP_CLAUSE_LINEAR (1U << 19) -#define OMP_CLAUSE_NOTINBRANCH (1U << 20) -#define OMP_CLAUSE_PROC_BIND (1U << 21) -#define OMP_CLAUSE_SAFELEN (1U << 22) -#define OMP_CLAUSE_SIMDLEN (1U << 23) -#define OMP_CLAUSE_UNIFORM (1U << 24) -#define OMP_CLAUSE_DEVICE (1U << 25) -#define OMP_CLAUSE_MAP (1U << 26) -#define OMP_CLAUSE_TO (1U << 27) -#define OMP_CLAUSE_FROM (1U << 28) -#define OMP_CLAUSE_NUM_TEAMS (1U << 29) -#define OMP_CLAUSE_THREAD_LIMIT (1U << 30) -#define OMP_CLAUSE_DIST_SCHEDULE (1U << 31) +static match +match_oacc_expr_list (const char *str, gfc_expr_list **list, + bool allow_asterisk) +{ + gfc_expr_list *head, *tail, *p; + locus old_loc; + gfc_expr *expr; + match m; -/* Match OpenMP directive clauses. MASK is a bitmask of + head = tail = NULL; + + old_loc = gfc_current_locus; + + m = gfc_match (str); + if (m != MATCH_YES) + return m; + + for (;;) + { + m = gfc_match_expr (&expr); + if (m == MATCH_YES || allow_asterisk) + { + p = gfc_get_expr_list (); + if (head == NULL) + head = tail = p; + else + { + tail->next = p; + tail = tail->next; + } + if (m == MATCH_YES) + tail->expr = expr; + else if (gfc_match (" *") != MATCH_YES) + goto syntax; + goto next_item; + } + if (m == MATCH_ERROR) + goto cleanup; + goto syntax; + + next_item: + if (gfc_match_char (')') == MATCH_YES) + break; + if (gfc_match_char (',') != MATCH_YES) + goto syntax; + } + + while (*list) + list = &(*list)->next; + + *list = head; + return MATCH_YES; + +syntax: + gfc_error ("Syntax error in OpenACC expression list at %C"); + +cleanup: + gfc_free_expr_list (head); + gfc_current_locus = old_loc; + return MATCH_ERROR; +} + +static match +match_oacc_clause_gang (gfc_omp_clauses *cp) +{ + if (gfc_match_char ('(') != MATCH_YES) + return MATCH_NO; + if (gfc_match (" num :") == MATCH_YES) + { + cp->gang_static = false; + return gfc_match (" %e )", &cp->gang_expr); + } + if (gfc_match (" static :") == MATCH_YES) + { + cp->gang_static = true; + if (gfc_match (" * )") != MATCH_YES) + return gfc_match (" %e )", &cp->gang_expr); + return MATCH_YES; + } + return gfc_match (" %e )", &cp->gang_expr); +} + +#define OMP_CLAUSE_PRIVATE ((uint64_t) 1 << 0) +#define OMP_CLAUSE_FIRSTPRIVATE ((uint64_t) 1 << 1) +#define OMP_CLAUSE_LASTPRIVATE ((uint64_t) 1 << 2) +#define OMP_CLAUSE_COPYPRIVATE ((uint64_t) 1 << 3) +#define OMP_CLAUSE_SHARED ((uint64_t) 1 << 4) +#define OMP_CLAUSE_COPYIN ((uint64_t) 1 << 5) +#define OMP_CLAUSE_REDUCTION ((uint64_t) 1 << 6) +#define OMP_CLAUSE_IF ((uint64_t) 1 << 7) +#define OMP_CLAUSE_NUM_THREADS ((uint64_t) 1 << 8) +#define OMP_CLAUSE_SCHEDULE ((uint64_t) 1 << 9) +#define OMP_CLAUSE_DEFAULT ((uint64_t) 1 << 10) +#define OMP_CLAUSE_ORDERED ((uint64_t) 1 << 11) +#define OMP_CLAUSE_COLLAPSE ((uint64_t) 1 << 12) +#define OMP_CLAUSE_UNTIED ((uint64_t) 1 << 13) +#define OMP_CLAUSE_FINAL ((uint64_t) 1 << 14) +#define OMP_CLAUSE_MERGEABLE ((uint64_t) 1 << 15) +#define OMP_CLAUSE_ALIGNED ((uint64_t) 1 << 16) +#define OMP_CLAUSE_DEPEND ((uint64_t) 1 << 17) +#define OMP_CLAUSE_INBRANCH ((uint64_t) 1 << 18) +#define OMP_CLAUSE_LINEAR ((uint64_t) 1 << 19) +#define OMP_CLAUSE_NOTINBRANCH ((uint64_t) 1 << 20) +#define OMP_CLAUSE_PROC_BIND ((uint64_t) 1 << 21) +#define OMP_CLAUSE_SAFELEN ((uint64_t) 1 << 22) +#define OMP_CLAUSE_SIMDLEN ((uint64_t) 1 << 23) +#define OMP_CLAUSE_UNIFORM ((uint64_t) 1 << 24) +#define OMP_CLAUSE_DEVICE ((uint64_t) 1 << 25) +#define OMP_CLAUSE_MAP ((uint64_t) 1 << 26) +#define OMP_CLAUSE_TO ((uint64_t) 1 << 27) +#define OMP_CLAUSE_FROM ((uint64_t) 1 << 28) +#define OMP_CLAUSE_NUM_TEAMS ((uint64_t) 1 << 29) +#define OMP_CLAUSE_THREAD_LIMIT ((uint64_t) 1 << 30) +#define OMP_CLAUSE_DIST_SCHEDULE ((uint64_t) 1 << 31) + +/* OpenACC 2.0 clauses. */ +#define OMP_CLAUSE_ASYNC ((uint64_t) 1 << 32) +#define OMP_CLAUSE_NUM_GANGS ((uint64_t) 1 << 33) +#define OMP_CLAUSE_NUM_WORKERS ((uint64_t) 1 << 34) +#define OMP_CLAUSE_VECTOR_LENGTH ((uint64_t) 1 << 35) +#define OMP_CLAUSE_COPY ((uint64_t) 1 << 36) +#define OMP_CLAUSE_COPYOUT ((uint64_t) 1 << 37) +#define OMP_CLAUSE_CREATE ((uint64_t) 1 << 38) +#define OMP_CLAUSE_PRESENT ((uint64_t) 1 << 39) +#define OMP_CLAUSE_PRESENT_OR_COPY ((uint64_t) 1 << 40) +#define OMP_CLAUSE_PRESENT_OR_COPYIN ((uint64_t) 1 << 41) +#define OMP_CLAUSE_PRESENT_OR_COPYOUT ((uint64_t) 1 << 42) +#define OMP_CLAUSE_PRESENT_OR_CREATE ((uint64_t) 1 << 43) +#define OMP_CLAUSE_DEVICEPTR ((uint64_t) 1 << 44) +#define OMP_CLAUSE_GANG ((uint64_t) 1 << 45) +#define OMP_CLAUSE_WORKER ((uint64_t) 1 << 46) +#define OMP_CLAUSE_VECTOR ((uint64_t) 1 << 47) +#define OMP_CLAUSE_SEQ ((uint64_t) 1 << 48) +#define OMP_CLAUSE_INDEPENDENT ((uint64_t) 1 << 49) +#define OMP_CLAUSE_USE_DEVICE ((uint64_t) 1 << 50) +#define OMP_CLAUSE_DEVICE_RESIDENT ((uint64_t) 1 << 51) +#define OMP_CLAUSE_HOST_SELF ((uint64_t) 1 << 52) +#define OMP_CLAUSE_OACC_DEVICE ((uint64_t) 1 << 53) +#define OMP_CLAUSE_WAIT ((uint64_t) 1 << 54) +#define OMP_CLAUSE_DELETE ((uint64_t) 1 << 55) +#define OMP_CLAUSE_AUTO ((uint64_t) 1 << 56) +#define OMP_CLAUSE_TILE ((uint64_t) 1 << 57) + +/* Helper function for OpenACC and OpenMP clauses involving memory + mapping. */ + +static bool +gfc_match_omp_map_clause (gfc_omp_namelist **list, gfc_omp_map_op map_op) +{ + gfc_omp_namelist **head = NULL; + if (gfc_match_omp_variable_list ("", list, false, NULL, &head, true) + == MATCH_YES) + { + gfc_omp_namelist *n; + for (n = *head; n; n = n->next) + n->u.map_op = map_op; + return true; + } + + return false; +} + +/* Match OpenMP and OpenACC directive clauses. MASK is a bitmask of clauses that are allowed for a particular directive. */ static match -gfc_match_omp_clauses (gfc_omp_clauses **cp, unsigned int mask, - bool first = true, bool needs_space = true) +gfc_match_omp_clauses (gfc_omp_clauses **cp, uint64_t mask, + bool first = true, bool needs_space = true, + bool openacc = false) { gfc_omp_clauses *c = gfc_get_omp_clauses (); locus old_loc; @@ -339,6 +493,56 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, unsigned int mask, needs_space = false; first = false; gfc_gobble_whitespace (); + if ((mask & OMP_CLAUSE_ASYNC) && !c->async) + if (gfc_match ("async") == MATCH_YES) + { + c->async = true; + needs_space = false; + if (gfc_match (" ( %e )", &c->async_expr) != MATCH_YES) + { + c->async_expr = gfc_get_constant_expr (BT_INTEGER, + gfc_default_integer_kind, + &gfc_current_locus); + mpz_set_si (c->async_expr->value.integer, GOMP_ASYNC_NOVAL); + } + continue; + } + if ((mask & OMP_CLAUSE_GANG) && !c->gang) + if (gfc_match ("gang") == MATCH_YES) + { + c->gang = true; + if (match_oacc_clause_gang(c) == MATCH_YES) + needs_space = false; + else + needs_space = true; + continue; + } + if ((mask & OMP_CLAUSE_WORKER) && !c->worker) + if (gfc_match ("worker") == MATCH_YES) + { + c->worker = true; + if (gfc_match (" ( num : %e )", &c->worker_expr) == MATCH_YES + || gfc_match (" ( %e )", &c->worker_expr) == MATCH_YES) + needs_space = false; + else + needs_space = true; + continue; + } + if ((mask & OMP_CLAUSE_VECTOR_LENGTH) && c->vector_length_expr == NULL + && gfc_match ("vector_length ( %e )", &c->vector_length_expr) + == MATCH_YES) + continue; + if ((mask & OMP_CLAUSE_VECTOR) && !c->vector) + if (gfc_match ("vector") == MATCH_YES) + { + c->vector = true; + if (gfc_match (" ( length : %e )", &c->vector_expr) == MATCH_YES + || gfc_match (" ( %e )", &c->vector_expr) == MATCH_YES) + needs_space = false; + else + needs_space = true; + continue; + } if ((mask & OMP_CLAUSE_IF) && c->if_expr == NULL && gfc_match ("if ( %e )", &c->if_expr) == MATCH_YES) continue; @@ -376,11 +580,159 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, unsigned int mask, &c->lists[OMP_LIST_SHARED], true) == MATCH_YES) continue; - if ((mask & OMP_CLAUSE_COPYIN) - && gfc_match_omp_variable_list ("copyin (", - &c->lists[OMP_LIST_COPYIN], true) + if (mask & OMP_CLAUSE_COPYIN) + { + if (openacc) + { + if (gfc_match ("copyin ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_TO)) + continue; + } + else if (gfc_match_omp_variable_list ("copyin (", + &c->lists[OMP_LIST_COPYIN], + true) == MATCH_YES) + continue; + } + if ((mask & OMP_CLAUSE_NUM_GANGS) && c->num_gangs_expr == NULL + && gfc_match ("num_gangs ( %e )", &c->num_gangs_expr) == MATCH_YES) + continue; + if ((mask & OMP_CLAUSE_NUM_WORKERS) && c->num_workers_expr == NULL + && gfc_match ("num_workers ( %e )", &c->num_workers_expr) + == MATCH_YES) + continue; + if ((mask & OMP_CLAUSE_COPY) + && gfc_match ("copy ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_TOFROM)) + continue; + if ((mask & OMP_CLAUSE_COPYOUT) + && gfc_match ("copyout ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_FROM)) + continue; + if ((mask & OMP_CLAUSE_CREATE) + && gfc_match ("create ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_ALLOC)) + continue; + if ((mask & OMP_CLAUSE_DELETE) + && gfc_match ("delete ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_DEALLOC)) + continue; + if ((mask & OMP_CLAUSE_PRESENT) + && gfc_match ("present ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_PRESENT)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_COPY) + && gfc_match ("present_or_copy ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_TOFROM)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_COPY) + && gfc_match ("pcopy ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_TOFROM)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_COPYIN) + && gfc_match ("present_or_copyin ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_TO)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_COPYIN) + && gfc_match ("pcopyin ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_TO)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_COPYOUT) + && gfc_match ("present_or_copyout ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FROM)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_COPYOUT) + && gfc_match ("pcopyout ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FROM)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_CREATE) + && gfc_match ("present_or_create ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_ALLOC)) + continue; + if ((mask & OMP_CLAUSE_PRESENT_OR_CREATE) + && gfc_match ("pcreate ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_ALLOC)) + continue; + if ((mask & OMP_CLAUSE_DEVICEPTR) + && gfc_match ("deviceptr ( ") == MATCH_YES) + { + gfc_omp_namelist **list = &c->lists[OMP_LIST_MAP]; + gfc_omp_namelist **head = NULL; + if (gfc_match_omp_variable_list ("", list, true, NULL, &head, false) + == MATCH_YES) + { + gfc_omp_namelist *n; + for (n = *head; n; n = n->next) + n->u.map_op = OMP_MAP_FORCE_DEVICEPTR; + continue; + } + } + if ((mask & OMP_CLAUSE_USE_DEVICE) + && gfc_match_omp_variable_list ("use_device (", + &c->lists[OMP_LIST_USE_DEVICE], true) == MATCH_YES) continue; + if ((mask & OMP_CLAUSE_DEVICE_RESIDENT) + && gfc_match_omp_variable_list ("device_resident (", + &c->lists[OMP_LIST_DEVICE_RESIDENT], + true) + == MATCH_YES) + continue; + if ((mask & OMP_CLAUSE_OACC_DEVICE) + && gfc_match ("device ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_TO)) + continue; + if ((mask & OMP_CLAUSE_HOST_SELF) + && (gfc_match ("host ( ") == MATCH_YES + || gfc_match ("self ( ") == MATCH_YES) + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_FORCE_FROM)) + continue; + if ((mask & OMP_CLAUSE_TILE) + && match_oacc_expr_list ("tile (", &c->tile_list, true) == MATCH_YES) + continue; + if ((mask & OMP_CLAUSE_SEQ) && !c->seq + && gfc_match ("seq") == MATCH_YES) + { + c->seq = true; + needs_space = true; + continue; + } + if ((mask & OMP_CLAUSE_INDEPENDENT) && !c->independent + && gfc_match ("independent") == MATCH_YES) + { + c->independent = true; + needs_space = true; + continue; + } + if ((mask & OMP_CLAUSE_AUTO) && !c->par_auto + && gfc_match ("auto") == MATCH_YES) + { + c->par_auto = true; + needs_space = true; + continue; + } + if ((mask & OMP_CLAUSE_WAIT) && !c->wait + && gfc_match ("wait") == MATCH_YES) + { + c->wait = true; + match_oacc_expr_list (" (", &c->wait_list, false); + continue; + } old_loc = gfc_current_locus; if ((mask & OMP_CLAUSE_REDUCTION) && gfc_match ("reduction ( ") == MATCH_YES) @@ -785,6 +1137,352 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, unsigned int mask, return MATCH_YES; } + +#define OACC_PARALLEL_CLAUSES \ + (OMP_CLAUSE_IF | OMP_CLAUSE_ASYNC | OMP_CLAUSE_NUM_GANGS \ + | OMP_CLAUSE_NUM_WORKERS | OMP_CLAUSE_VECTOR_LENGTH | OMP_CLAUSE_REDUCTION \ + | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \ + | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT | OMP_CLAUSE_PRESENT_OR_COPY \ + | OMP_CLAUSE_PRESENT_OR_COPYIN | OMP_CLAUSE_PRESENT_OR_COPYOUT \ + | OMP_CLAUSE_PRESENT_OR_CREATE | OMP_CLAUSE_DEVICEPTR | OMP_CLAUSE_PRIVATE \ + | OMP_CLAUSE_FIRSTPRIVATE | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_WAIT) +#define OACC_KERNELS_CLAUSES \ + (OMP_CLAUSE_IF | OMP_CLAUSE_ASYNC | OMP_CLAUSE_DEVICEPTR \ + | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \ + | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT | OMP_CLAUSE_PRESENT_OR_COPY \ + | OMP_CLAUSE_PRESENT_OR_COPYIN | OMP_CLAUSE_PRESENT_OR_COPYOUT \ + | OMP_CLAUSE_PRESENT_OR_CREATE | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_WAIT) +#define OACC_DATA_CLAUSES \ + (OMP_CLAUSE_IF | OMP_CLAUSE_DEVICEPTR | OMP_CLAUSE_COPY \ + | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT | OMP_CLAUSE_CREATE \ + | OMP_CLAUSE_PRESENT | OMP_CLAUSE_PRESENT_OR_COPY \ + | OMP_CLAUSE_PRESENT_OR_COPYIN | OMP_CLAUSE_PRESENT_OR_COPYOUT \ + | OMP_CLAUSE_PRESENT_OR_CREATE) +#define OACC_LOOP_CLAUSES \ + (OMP_CLAUSE_COLLAPSE | OMP_CLAUSE_GANG | OMP_CLAUSE_WORKER \ + | OMP_CLAUSE_VECTOR | OMP_CLAUSE_SEQ | OMP_CLAUSE_INDEPENDENT \ + | OMP_CLAUSE_PRIVATE | OMP_CLAUSE_REDUCTION | OMP_CLAUSE_AUTO \ + | OMP_CLAUSE_TILE) +#define OACC_PARALLEL_LOOP_CLAUSES \ + (OACC_LOOP_CLAUSES | OACC_PARALLEL_CLAUSES) +#define OACC_KERNELS_LOOP_CLAUSES \ + (OACC_LOOP_CLAUSES | OACC_KERNELS_CLAUSES) +#define OACC_HOST_DATA_CLAUSES OMP_CLAUSE_USE_DEVICE +#define OACC_DECLARE_CLAUSES \ + (OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \ + | OMP_CLAUSE_CREATE | OMP_CLAUSE_DEVICEPTR | OMP_CLAUSE_DEVICE_RESIDENT \ + | OMP_CLAUSE_PRESENT | OMP_CLAUSE_PRESENT_OR_COPY \ + | OMP_CLAUSE_PRESENT_OR_COPYIN | OMP_CLAUSE_PRESENT_OR_COPYOUT \ + | OMP_CLAUSE_PRESENT_OR_CREATE) +#define OACC_UPDATE_CLAUSES \ + (OMP_CLAUSE_IF | OMP_CLAUSE_ASYNC | OMP_CLAUSE_HOST_SELF \ + | OMP_CLAUSE_OACC_DEVICE | OMP_CLAUSE_WAIT) +#define OACC_ENTER_DATA_CLAUSES \ + (OMP_CLAUSE_IF | OMP_CLAUSE_ASYNC | OMP_CLAUSE_WAIT | OMP_CLAUSE_COPYIN \ + | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT_OR_COPYIN \ + | OMP_CLAUSE_PRESENT_OR_CREATE) +#define OACC_EXIT_DATA_CLAUSES \ + (OMP_CLAUSE_IF | OMP_CLAUSE_ASYNC | OMP_CLAUSE_WAIT | OMP_CLAUSE_COPYOUT \ + | OMP_CLAUSE_DELETE) +#define OACC_WAIT_CLAUSES \ + (OMP_CLAUSE_ASYNC) + + +match +gfc_match_oacc_parallel_loop (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_PARALLEL_LOOP_CLAUSES, false, false, + true) != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_PARALLEL_LOOP; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_parallel (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_PARALLEL_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_PARALLEL; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_kernels_loop (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_KERNELS_LOOP_CLAUSES, false, false, + true) != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_KERNELS_LOOP; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_kernels (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_KERNELS_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_KERNELS; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_data (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_DATA_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_DATA; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_host_data (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_HOST_DATA_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_HOST_DATA; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_loop (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_LOOP_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_LOOP; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_declare (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_DECLARE_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.ext.omp_clauses = c; + new_st.ext.omp_clauses->loc = gfc_current_locus; + return MATCH_YES; +} + + +match +gfc_match_oacc_update (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_UPDATE_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_UPDATE; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_enter_data (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_ENTER_DATA_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_ENTER_DATA; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_exit_data (void) +{ + gfc_omp_clauses *c; + if (gfc_match_omp_clauses (&c, OACC_EXIT_DATA_CLAUSES, false, false, true) + != MATCH_YES) + return MATCH_ERROR; + + new_st.op = EXEC_OACC_EXIT_DATA; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_wait (void) +{ + gfc_omp_clauses *c = gfc_get_omp_clauses (); + gfc_expr_list *wait_list = NULL, *el; + + match_oacc_expr_list (" (", &wait_list, true); + gfc_match_omp_clauses (&c, OACC_WAIT_CLAUSES, false, false, true); + + if (gfc_match_omp_eos () != MATCH_YES) + { + gfc_error ("Unexpected junk in !$ACC WAIT at %C"); + return MATCH_ERROR; + } + + if (wait_list) + for (el = wait_list; el; el = el->next) + { + if (el->expr == NULL) + { + gfc_error ("Invalid argument to $!ACC WAIT at %L", + &wait_list->expr->where); + return MATCH_ERROR; + } + + if (!gfc_resolve_expr (el->expr) + || el->expr->ts.type != BT_INTEGER || el->expr->rank != 0 + || el->expr->expr_type != EXPR_CONSTANT) + { + gfc_error ("WAIT clause at %L requires a scalar INTEGER expression", + &el->expr->where); + + return MATCH_ERROR; + } + } + c->wait_list = wait_list; + new_st.op = EXEC_OACC_WAIT; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_cache (void) +{ + gfc_omp_clauses *c = gfc_get_omp_clauses (); + match m = gfc_match_omp_variable_list (" (", + &c->lists[OMP_LIST_CACHE], true); + if (m != MATCH_YES) + { + gfc_free_omp_clauses(c); + return m; + } + + if (gfc_current_state() != COMP_DO + && gfc_current_state() != COMP_DO_CONCURRENT) + { + gfc_error ("ACC CACHE directive must be inside of loop %C"); + gfc_free_omp_clauses(c); + return MATCH_ERROR; + } + + new_st.op = EXEC_OACC_CACHE; + new_st.ext.omp_clauses = c; + return MATCH_YES; +} + + +match +gfc_match_oacc_routine (void) +{ + locus old_loc; + gfc_symbol *sym; + match m; + + old_loc = gfc_current_locus; + + m = gfc_match (" ("); + + if (gfc_current_ns->proc_name + && gfc_current_ns->proc_name->attr.if_source == IFSRC_IFBODY + && m == MATCH_YES) + { + gfc_error ("Only the !$ACC ROUTINE form without " + "list is allowed in interface block at %C"); + goto cleanup; + } + + if (m == MATCH_NO + && gfc_current_ns->proc_name + && gfc_match_omp_eos () == MATCH_YES) + { + if (!gfc_add_omp_declare_target (&gfc_current_ns->proc_name->attr, + gfc_current_ns->proc_name->name, + &old_loc)) + goto cleanup; + return MATCH_YES; + } + + if (m != MATCH_YES) + return m; + + /* Scan for a function name. */ + m = gfc_match_symbol (&sym, 0); + + if (m != MATCH_YES) + { + gfc_error ("Syntax error in !$ACC ROUTINE ( NAME ) at %C"); + gfc_current_locus = old_loc; + return MATCH_ERROR; + } + + if (!sym->attr.external && !sym->attr.function && !sym->attr.subroutine) + { + gfc_error ("Syntax error in !$ACC ROUTINE ( NAME ) at %C, invalid" + " function name '%s'", sym->name); + gfc_current_locus = old_loc; + return MATCH_ERROR; + } + + if (gfc_match_char (')') != MATCH_YES) + { + gfc_error ("Syntax error in !$ACC ROUTINE ( NAME ) at %C, expecting" + " ')' after NAME"); + gfc_current_locus = old_loc; + return MATCH_ERROR; + } + + if (gfc_match_omp_eos () != MATCH_YES) + { + gfc_error ("Unexpected junk after !$ACC ROUTINE at %C"); + goto cleanup; + } + return MATCH_YES; + +cleanup: + gfc_current_locus = old_loc; + return MATCH_ERROR; +} + + #define OMP_PARALLEL_CLAUSES \ (OMP_CLAUSE_PRIVATE | OMP_CLAUSE_FIRSTPRIVATE | OMP_CLAUSE_SHARED \ | OMP_CLAUSE_COPYIN | OMP_CLAUSE_REDUCTION | OMP_CLAUSE_IF \ @@ -1916,6 +2614,129 @@ gfc_match_omp_end_single (void) } +static bool +oacc_is_loop (gfc_code *code) +{ + return code->op == EXEC_OACC_PARALLEL_LOOP + || code->op == EXEC_OACC_KERNELS_LOOP + || code->op == EXEC_OACC_LOOP; +} + +static void +resolve_oacc_scalar_int_expr (gfc_expr *expr, const char *clause) +{ + if (!gfc_resolve_expr (expr) + || expr->ts.type != BT_INTEGER || expr->rank != 0) + gfc_error ("%s clause at %L requires a scalar INTEGER expression", + clause, &expr->where); +} + + +static void +resolve_oacc_positive_int_expr (gfc_expr *expr, const char *clause) +{ + resolve_oacc_scalar_int_expr (expr, clause); + if (expr->expr_type == EXPR_CONSTANT && expr->ts.type == BT_INTEGER + && mpz_sgn(expr->value.integer) <= 0) + gfc_warning ("INTEGER expression of %s clause at %L must be positive", + clause, &expr->where); +} + +/* Emits error when symbol is pointer, cray pointer or cray pointee + of derived of polymorphic type. */ + +static void +check_symbol_not_pointer (gfc_symbol *sym, locus loc, const char *name) +{ + if (sym->ts.type == BT_DERIVED && sym->attr.pointer) + gfc_error ("POINTER object '%s' of derived type in %s clause at %L", + sym->name, name, &loc); + if (sym->ts.type == BT_DERIVED && sym->attr.cray_pointer) + gfc_error ("Cray pointer object of derived type '%s' in %s clause at %L", + sym->name, name, &loc); + if (sym->ts.type == BT_DERIVED && sym->attr.cray_pointee) + gfc_error ("Cray pointee object of derived type '%s' in %s clause at %L", + sym->name, name, &loc); + + if ((sym->ts.type == BT_ASSUMED && sym->attr.pointer) + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.pointer)) + gfc_error ("POINTER object '%s' of polymorphic type in %s clause at %L", + sym->name, name, &loc); + if ((sym->ts.type == BT_ASSUMED && sym->attr.cray_pointer) + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.cray_pointer)) + gfc_error ("Cray pointer object of polymorphic type '%s' in %s clause at %L", + sym->name, name, &loc); + if ((sym->ts.type == BT_ASSUMED && sym->attr.cray_pointee) + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.cray_pointee)) + gfc_error ("Cray pointee object of polymorphic type '%s' in %s clause at %L", + sym->name, name, &loc); +} + +/* Emits error when symbol represents assumed size/rank array. */ + +static void +check_array_not_assumed (gfc_symbol *sym, locus loc, const char *name) +{ + if (sym->as && sym->as->type == AS_ASSUMED_SIZE) + gfc_error ("Assumed size array '%s' in %s clause at %L", + sym->name, name, &loc); + if (sym->as && sym->as->type == AS_ASSUMED_RANK) + gfc_error ("Assumed rank array '%s' in %s clause at %L", + sym->name, name, &loc); + if (sym->as && sym->as->type == AS_DEFERRED && sym->attr.pointer + && !sym->attr.contiguous) + gfc_error ("Noncontiguous deferred shape array '%s' in %s clause at %L", + sym->name, name, &loc); +} + +static void +resolve_oacc_data_clauses (gfc_symbol *sym, locus loc, const char *name) +{ + if (sym->ts.type == BT_DERIVED && sym->attr.allocatable) + gfc_error ("ALLOCATABLE object '%s' of derived type in %s clause at %L", + sym->name, name, &loc); + if ((sym->ts.type == BT_ASSUMED && sym->attr.allocatable) + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.allocatable)) + gfc_error ("ALLOCATABLE object '%s' of polymorphic type " + "in %s clause at %L", sym->name, name, &loc); + check_symbol_not_pointer (sym, loc, name); + check_array_not_assumed (sym, loc, name); +} + +static void +resolve_oacc_deviceptr_clause (gfc_symbol *sym, locus loc, const char *name) +{ + if (sym->attr.pointer + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.class_pointer)) + gfc_error ("POINTER object '%s' in %s clause at %L", + sym->name, name, &loc); + if (sym->attr.cray_pointer + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.cray_pointer)) + gfc_error ("Cray pointer object '%s' in %s clause at %L", + sym->name, name, &loc); + if (sym->attr.cray_pointee + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.cray_pointee)) + gfc_error ("Cray pointee object '%s' in %s clause at %L", + sym->name, name, &loc); + if (sym->attr.allocatable + || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && CLASS_DATA (sym)->attr.allocatable)) + gfc_error ("ALLOCATABLE object '%s' in %s clause at %L", + sym->name, name, &loc); + if (sym->attr.value) + gfc_error ("VALUE object '%s' in %s clause at %L", + sym->name, name, &loc); + check_array_not_assumed (sym, loc, name); +} + + struct resolve_omp_udr_callback_data { gfc_symbol *sym1, *sym2; @@ -2013,19 +2834,45 @@ resolve_omp_udr_clause (gfc_omp_namelist *n, gfc_namespace *ns, return copy; } +/* Returns true if clause in list 'list' is compatible with any of + of the clauses in lists [0..list-1]. E.g., a reduction variable may + appear in both reduction and private clauses, so this function + will return true in this case. */ + +static bool +oacc_compatible_clauses (gfc_omp_clauses *clauses, int list, + gfc_symbol *sym, bool openacc) +{ + gfc_omp_namelist *n; + + if (!openacc) + return false; + + if (list != OMP_LIST_REDUCTION) + return false; + + for (n = clauses->lists[OMP_LIST_FIRST]; n; n = n->next) + if (n->sym == sym) + return true; + + return false; +} /* OpenMP directive resolving routines. */ static void resolve_omp_clauses (gfc_code *code, locus *where, - gfc_omp_clauses *omp_clauses, gfc_namespace *ns) + gfc_omp_clauses *omp_clauses, gfc_namespace *ns, + bool openacc = false) { gfc_omp_namelist *n; + gfc_expr_list *el; int list; static const char *clause_names[] = { "PRIVATE", "FIRSTPRIVATE", "LASTPRIVATE", "COPYPRIVATE", "SHARED", "COPYIN", "UNIFORM", "ALIGNED", "LINEAR", "DEPEND", "MAP", - "TO", "FROM", "REDUCTION" }; + "TO", "FROM", "REDUCTION", "DEVICE_RESIDENT", "USE_DEVICE", + "CACHE" }; if (omp_clauses == NULL) return; @@ -2115,12 +2962,13 @@ resolve_omp_clauses (gfc_code *code, locus *where, && list != OMP_LIST_LASTPRIVATE && list != OMP_LIST_ALIGNED && list != OMP_LIST_DEPEND - && list != OMP_LIST_MAP + && (list != OMP_LIST_MAP || openacc) && list != OMP_LIST_FROM && list != OMP_LIST_TO) for (n = omp_clauses->lists[list]; n; n = n->next) { - if (n->sym->mark) + if (n->sym->mark && !oacc_compatible_clauses (omp_clauses, list, + n->sym, openacc)) gfc_error ("Symbol %qs present on multiple clauses at %L", n->sym->name, where); else @@ -2262,53 +3110,64 @@ resolve_omp_clauses (gfc_code *code, locus *where, case OMP_LIST_TO: case OMP_LIST_FROM: for (; n != NULL; n = n->next) - if (n->expr) - { - if (!gfc_resolve_expr (n->expr) - || n->expr->expr_type != EXPR_VARIABLE - || n->expr->ref == NULL - || n->expr->ref->next - || n->expr->ref->type != REF_ARRAY) - gfc_error ("%qs in %s clause at %L is not a proper " - "array section", n->sym->name, name, where); - else if (n->expr->ref->u.ar.codimen) - gfc_error ("Coarrays not supported in %s clause at %L", - name, where); - else - { - int i; - gfc_array_ref *ar = &n->expr->ref->u.ar; - for (i = 0; i < ar->dimen; i++) - if (ar->stride[i]) - { - gfc_error ("Stride should not be specified for " - "array section in %s clause at %L", - name, where); - break; - } - else if (ar->dimen_type[i] != DIMEN_ELEMENT - && ar->dimen_type[i] != DIMEN_RANGE) - { - gfc_error ("%qs in %s clause at %L is not a " - "proper array section", - n->sym->name, name, where); - break; - } - else if (list == OMP_LIST_DEPEND - && ar->start[i] - && ar->start[i]->expr_type == EXPR_CONSTANT - && ar->end[i] - && ar->end[i]->expr_type == EXPR_CONSTANT - && mpz_cmp (ar->start[i]->value.integer, - ar->end[i]->value.integer) > 0) - { - gfc_error ("%qs in DEPEND clause at %L is a zero " - "size array section", n->sym->name, - where); - break; - } - } - } + { + if (n->expr) + { + if (!gfc_resolve_expr (n->expr) + || n->expr->expr_type != EXPR_VARIABLE + || n->expr->ref == NULL + || n->expr->ref->next + || n->expr->ref->type != REF_ARRAY) + gfc_error ("%qs in %s clause at %L is not a proper " + "array section", n->sym->name, name, where); + else if (n->expr->ref->u.ar.codimen) + gfc_error ("Coarrays not supported in %s clause at %L", + name, where); + else + { + int i; + gfc_array_ref *ar = &n->expr->ref->u.ar; + for (i = 0; i < ar->dimen; i++) + if (ar->stride[i]) + { + gfc_error ("Stride should not be specified for " + "array section in %s clause at %L", + name, where); + break; + } + else if (ar->dimen_type[i] != DIMEN_ELEMENT + && ar->dimen_type[i] != DIMEN_RANGE) + { + gfc_error ("%qs in %s clause at %L is not a " + "proper array section", + n->sym->name, name, where); + break; + } + else if (list == OMP_LIST_DEPEND + && ar->start[i] + && ar->start[i]->expr_type == EXPR_CONSTANT + && ar->end[i] + && ar->end[i]->expr_type == EXPR_CONSTANT + && mpz_cmp (ar->start[i]->value.integer, + ar->end[i]->value.integer) > 0) + { + gfc_error ("%qs in DEPEND clause at %L is a " + "zero size array section", + n->sym->name, where); + break; + } + } + } + else if (openacc) + { + if (list == OMP_LIST_MAP + && n->u.map_op == OMP_MAP_FORCE_DEVICEPTR) + resolve_oacc_deviceptr_clause (n->sym, *where, name); + else + resolve_oacc_data_clauses (n->sym, *where, name); + } + } + if (list != OMP_LIST_DEPEND) for (n = omp_clauses->lists[list]; n != NULL; n = n->next) { @@ -2346,7 +3205,10 @@ resolve_omp_clauses (gfc_code *code, locus *where, gfc_error ("Cray pointer %qs in %s clause at %L", n->sym->name, name, where); } - if (n->sym->as && n->sym->as->type == AS_ASSUMED_SIZE) + if (code + && (oacc_is_loop (code) || code->op == EXEC_OACC_PARALLEL)) + check_array_not_assumed (n->sym, *where, name); + else if (n->sym->as && n->sym->as->type == AS_ASSUMED_SIZE) gfc_error ("Assumed size array %qs in %s clause at %L", n->sym->name, name, where); if (n->sym->attr.in_namelist && list != OMP_LIST_REDUCTION) @@ -2366,6 +3228,7 @@ resolve_omp_clauses (gfc_code *code, locus *where, default: break; } + switch (list) { case OMP_LIST_REDUCTION: @@ -2499,6 +3362,30 @@ resolve_omp_clauses (gfc_code *code, locus *where, to be done here for OMP_LIST_PRIVATE. */ case OMP_LIST_PRIVATE: gcc_assert (code && code->op != EXEC_NOP); + break; + case OMP_LIST_USE_DEVICE: + if (n->sym->attr.allocatable + || (n->sym->ts.type == BT_CLASS && CLASS_DATA (n->sym) + && CLASS_DATA (n->sym)->attr.allocatable)) + gfc_error ("ALLOCATABLE object '%s' in %s clause at %L", + n->sym->name, name, where); + if (n->sym->attr.pointer + || (n->sym->ts.type == BT_CLASS && CLASS_DATA (n->sym) + && CLASS_DATA (n->sym)->attr.class_pointer)) + gfc_error ("POINTER object '%s' in %s clause at %L", + n->sym->name, name, where); + if (n->sym->attr.cray_pointer) + gfc_error ("Cray pointer object '%s' in %s clause at %L", + n->sym->name, name, where); + if (n->sym->attr.cray_pointee) + gfc_error ("Cray pointee object '%s' in %s clause at %L", + n->sym->name, name, where); + /* FALLTHRU */ + case OMP_LIST_DEVICE_RESIDENT: + case OMP_LIST_CACHE: + check_symbol_not_pointer (n->sym, *where, name); + check_array_not_assumed (n->sym, *where, name); + break; default: break; } @@ -2554,6 +3441,25 @@ resolve_omp_clauses (gfc_code *code, locus *where, gfc_error ("THREAD_LIMIT clause at %L requires a scalar " "INTEGER expression", &expr->where); } + if (omp_clauses->async) + if (omp_clauses->async_expr) + resolve_oacc_scalar_int_expr (omp_clauses->async_expr, "ASYNC"); + if (omp_clauses->num_gangs_expr) + resolve_oacc_positive_int_expr (omp_clauses->num_gangs_expr, "NUM_GANGS"); + if (omp_clauses->num_workers_expr) + resolve_oacc_positive_int_expr (omp_clauses->num_workers_expr, "NUM_WORKERS"); + if (omp_clauses->vector_length_expr) + resolve_oacc_positive_int_expr (omp_clauses->vector_length_expr, "VECTOR_LENGTH"); + if (omp_clauses->gang_expr) + resolve_oacc_positive_int_expr (omp_clauses->gang_expr, "GANG"); + if (omp_clauses->worker_expr) + resolve_oacc_positive_int_expr (omp_clauses->worker_expr, "WORKER"); + if (omp_clauses->vector_expr) + resolve_oacc_positive_int_expr (omp_clauses->vector_expr, "VECTOR"); + if (omp_clauses->wait) + if (omp_clauses->wait_list) + for (el = omp_clauses->wait_list; el; el = el->next) + resolve_oacc_scalar_int_expr (el->expr, "WAIT"); } @@ -3021,6 +3927,7 @@ struct fortran_omp_context hash_set *sharing_clauses; hash_set *private_iterators; struct fortran_omp_context *previous; + bool is_openmp; } *omp_current_ctx; static gfc_code *omp_current_do_code; static int omp_current_do_collapse; @@ -3065,6 +3972,7 @@ gfc_resolve_omp_parallel_blocks (gfc_code *code, gfc_namespace *ns) ctx.sharing_clauses = new hash_set; ctx.private_iterators = new hash_set; ctx.previous = omp_current_ctx; + ctx.is_openmp = true; omp_current_ctx = &ctx; for (list = 0; list < OMP_LIST_NUM; list++) @@ -3159,7 +4067,12 @@ gfc_resolve_do_iterator (gfc_code *code, gfc_symbol *sym) if (omp_current_ctx == NULL) return; - if (omp_current_ctx->sharing_clauses->contains (sym)) + /* An openacc context may represent a data clause. Abort if so. */ + if (!omp_current_ctx->is_openmp && !oacc_is_loop (omp_current_ctx->code)) + return; + + if (omp_current_ctx->is_openmp + && omp_current_ctx->sharing_clauses->contains (sym)) return; if (! omp_current_ctx->private_iterators->add (sym)) @@ -3340,6 +4253,428 @@ resolve_omp_do (gfc_code *code) } } +static bool +oacc_is_parallel (gfc_code *code) +{ + return code->op == EXEC_OACC_PARALLEL || code->op == EXEC_OACC_PARALLEL_LOOP; +} + +static bool +oacc_is_kernels (gfc_code *code) +{ + return code->op == EXEC_OACC_KERNELS || code->op == EXEC_OACC_KERNELS_LOOP; +} + +static gfc_statement +omp_code_to_statement (gfc_code *code) +{ + switch (code->op) + { + case EXEC_OMP_PARALLEL: + return ST_OMP_PARALLEL; + case EXEC_OMP_PARALLEL_SECTIONS: + return ST_OMP_PARALLEL_SECTIONS; + case EXEC_OMP_SECTIONS: + return ST_OMP_SECTIONS; + case EXEC_OMP_ORDERED: + return ST_OMP_ORDERED; + case EXEC_OMP_CRITICAL: + return ST_OMP_CRITICAL; + case EXEC_OMP_MASTER: + return ST_OMP_MASTER; + case EXEC_OMP_SINGLE: + return ST_OMP_SINGLE; + case EXEC_OMP_TASK: + return ST_OMP_TASK; + case EXEC_OMP_WORKSHARE: + return ST_OMP_WORKSHARE; + case EXEC_OMP_PARALLEL_WORKSHARE: + return ST_OMP_PARALLEL_WORKSHARE; + case EXEC_OMP_DO: + return ST_OMP_DO; + default: + gcc_unreachable (); + } +} + +static gfc_statement +oacc_code_to_statement (gfc_code *code) +{ + switch (code->op) + { + case EXEC_OACC_PARALLEL: + return ST_OACC_PARALLEL; + case EXEC_OACC_KERNELS: + return ST_OACC_KERNELS; + case EXEC_OACC_DATA: + return ST_OACC_DATA; + case EXEC_OACC_HOST_DATA: + return ST_OACC_HOST_DATA; + case EXEC_OACC_PARALLEL_LOOP: + return ST_OACC_PARALLEL_LOOP; + case EXEC_OACC_KERNELS_LOOP: + return ST_OACC_KERNELS_LOOP; + case EXEC_OACC_LOOP: + return ST_OACC_LOOP; + default: + gcc_unreachable (); + } +} + +static void +resolve_oacc_directive_inside_omp_region (gfc_code *code) +{ + if (omp_current_ctx != NULL && omp_current_ctx->is_openmp) + { + gfc_statement st = omp_code_to_statement (omp_current_ctx->code); + gfc_statement oacc_st = oacc_code_to_statement (code); + gfc_error ("The %s directive cannot be specified within " + "a %s region at %L", gfc_ascii_statement (oacc_st), + gfc_ascii_statement (st), &code->loc); + } +} + +static void +resolve_omp_directive_inside_oacc_region (gfc_code *code) +{ + if (omp_current_ctx != NULL && !omp_current_ctx->is_openmp) + { + gfc_statement st = oacc_code_to_statement (omp_current_ctx->code); + gfc_statement omp_st = omp_code_to_statement (code); + gfc_error ("The %s directive cannot be specified within " + "a %s region at %L", gfc_ascii_statement (omp_st), + gfc_ascii_statement (st), &code->loc); + } +} + + +static void +resolve_oacc_nested_loops (gfc_code *code, gfc_code* do_code, int collapse, + const char *clause) +{ + gfc_symbol *dovar; + gfc_code *c; + int i; + + for (i = 1; i <= collapse; i++) + { + if (do_code->op == EXEC_DO_WHILE) + { + gfc_error ("!$ACC LOOP cannot be a DO WHILE or DO without loop control " + "at %L", &do_code->loc); + break; + } + gcc_assert (do_code->op == EXEC_DO || do_code->op == EXEC_DO_CONCURRENT); + if (do_code->ext.iterator->var->ts.type != BT_INTEGER) + gfc_error ("!$ACC LOOP iteration variable must be of type integer at %L", + &do_code->loc); + dovar = do_code->ext.iterator->var->symtree->n.sym; + if (i > 1) + { + gfc_code *do_code2 = code->block->next; + int j; + + for (j = 1; j < i; j++) + { + gfc_symbol *ivar = do_code2->ext.iterator->var->symtree->n.sym; + if (dovar == ivar + || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->start) + || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->end) + || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->step)) + { + gfc_error ("!$ACC LOOP %s loops don't form rectangular iteration space at %L", + clause, &do_code->loc); + break; + } + if (j < i) + break; + do_code2 = do_code2->block->next; + } + } + if (i == collapse) + break; + for (c = do_code->next; c; c = c->next) + if (c->op != EXEC_NOP && c->op != EXEC_CONTINUE) + { + gfc_error ("%s !$ACC LOOP loops not perfectly nested at %L", + clause, &c->loc); + break; + } + if (c) + break; + do_code = do_code->block; + if (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE + && do_code->op != EXEC_DO_CONCURRENT) + { + gfc_error ("not enough DO loops for %s !$ACC LOOP at %L", + clause, &code->loc); + break; + } + do_code = do_code->next; + if (do_code == NULL + || (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE + && do_code->op != EXEC_DO_CONCURRENT)) + { + gfc_error ("not enough DO loops for %s !$ACC LOOP at %L", + clause, &code->loc); + break; + } + } +} + + +static void +resolve_oacc_params_in_parallel (gfc_code *code, const char *clause) +{ + fortran_omp_context *c; + + if (oacc_is_parallel (code)) + gfc_error ("!$ACC LOOP %s in PARALLEL region doesn't allow " + "non-static arguments at %L", clause, &code->loc); + for (c = omp_current_ctx; c; c = c->previous) + { + if (oacc_is_loop (c->code)) + break; + if (oacc_is_parallel (c->code)) + gfc_error ("!$ACC LOOP %s in PARALLEL region doesn't allow " + "non-static arguments at %L", clause, &code->loc); + } +} + + +static void +resolve_oacc_loop_blocks (gfc_code *code) +{ + fortran_omp_context *c; + + if (!oacc_is_loop (code)) + return; + + if (code->op == EXEC_OACC_LOOP) + for (c = omp_current_ctx; c; c = c->previous) + { + if (oacc_is_loop (c->code)) + { + if (code->ext.omp_clauses->gang) + { + if (c->code->ext.omp_clauses->gang) + gfc_error ("Loop parallelized across gangs is not allowed " + "inside another loop parallelized across gangs at %L", + &code->loc); + if (c->code->ext.omp_clauses->worker) + gfc_error ("Loop parallelized across gangs is not allowed " + "inside loop parallelized across workers at %L", + &code->loc); + if (c->code->ext.omp_clauses->vector) + gfc_error ("Loop parallelized across gangs is not allowed " + "inside loop parallelized across workers at %L", + &code->loc); + } + if (code->ext.omp_clauses->worker) + { + if (c->code->ext.omp_clauses->worker) + gfc_error ("Loop parallelized across workers is not allowed " + "inside another loop parallelized across workers at %L", + &code->loc); + if (c->code->ext.omp_clauses->vector) + gfc_error ("Loop parallelized across workers is not allowed " + "inside another loop parallelized across vectors at %L", + &code->loc); + } + if (code->ext.omp_clauses->vector) + if (c->code->ext.omp_clauses->vector) + gfc_error ("Loop parallelized across vectors is not allowed " + "inside another loop parallelized across vectors at %L", + &code->loc); + } + + if (oacc_is_parallel (c->code) || oacc_is_kernels (c->code)) + break; + } + + if (code->ext.omp_clauses->seq) + { + if (code->ext.omp_clauses->independent) + gfc_error ("Clause SEQ conflicts with INDEPENDENT at %L", &code->loc); + if (code->ext.omp_clauses->gang) + gfc_error ("Clause SEQ conflicts with GANG at %L", &code->loc); + if (code->ext.omp_clauses->worker) + gfc_error ("Clause SEQ conflicts with WORKER at %L", &code->loc); + if (code->ext.omp_clauses->vector) + gfc_error ("Clause SEQ conflicts with VECTOR at %L", &code->loc); + if (code->ext.omp_clauses->par_auto) + gfc_error ("Clause SEQ conflicts with AUTO at %L", &code->loc); + } + if (code->ext.omp_clauses->par_auto) + { + if (code->ext.omp_clauses->gang) + gfc_error ("Clause AUTO conflicts with GANG at %L", &code->loc); + if (code->ext.omp_clauses->worker) + gfc_error ("Clause AUTO conflicts with WORKER at %L", &code->loc); + if (code->ext.omp_clauses->vector) + gfc_error ("Clause AUTO conflicts with VECTOR at %L", &code->loc); + } + if (!code->ext.omp_clauses->tile_list) + { + if (code->ext.omp_clauses->gang) + { + if (code->ext.omp_clauses->worker) + gfc_error ("Clause GANG conflicts with WORKER at %L", &code->loc); + if (code->ext.omp_clauses->vector) + gfc_error ("Clause GANG conflicts with VECTOR at %L", &code->loc); + } + if (code->ext.omp_clauses->worker) + if (code->ext.omp_clauses->vector) + gfc_error ("Clause WORKER conflicts with VECTOR at %L", &code->loc); + } + else if (code->ext.omp_clauses->gang + && code->ext.omp_clauses->worker + && code->ext.omp_clauses->vector) + gfc_error ("Tiled loop cannot be parallelized across gangs, workers and " + "vectors at the same time at %L", &code->loc); + + if (code->ext.omp_clauses->gang + && code->ext.omp_clauses->gang_expr + && !code->ext.omp_clauses->gang_static) + resolve_oacc_params_in_parallel (code, "GANG"); + + if (code->ext.omp_clauses->worker + && code->ext.omp_clauses->worker_expr) + resolve_oacc_params_in_parallel (code, "WORKER"); + + if (code->ext.omp_clauses->tile_list) + { + gfc_expr_list *el; + int num = 0; + for (el = code->ext.omp_clauses->tile_list; el; el = el->next) + { + num++; + if (el->expr == NULL) + continue; + resolve_oacc_positive_int_expr (el->expr, "TILE"); + if (el->expr->expr_type != EXPR_CONSTANT) + gfc_error ("TILE requires constant expression at %L", &code->loc); + } + resolve_oacc_nested_loops (code, code->block->next, num, "tiled"); + } +} + + +void +gfc_resolve_oacc_blocks (gfc_code *code, gfc_namespace *ns) +{ + fortran_omp_context ctx; + + resolve_oacc_loop_blocks (code); + + ctx.code = code; + ctx.sharing_clauses = NULL; + ctx.private_iterators = new hash_set; + ctx.previous = omp_current_ctx; + ctx.is_openmp = false; + omp_current_ctx = &ctx; + + gfc_resolve_blocks (code->block, ns); + + omp_current_ctx = ctx.previous; + delete ctx.private_iterators; +} + + +static void +resolve_oacc_loop (gfc_code *code) +{ + gfc_code *do_code; + int collapse; + + if (code->ext.omp_clauses) + resolve_omp_clauses (code, &code->loc, code->ext.omp_clauses, NULL, true); + + do_code = code->block->next; + collapse = code->ext.omp_clauses->collapse; + + if (collapse <= 0) + collapse = 1; + resolve_oacc_nested_loops (code, do_code, collapse, "collapsed"); +} + + +static void +resolve_oacc_cache (gfc_code *code ATTRIBUTE_UNUSED) +{ + sorry ("Sorry, !$ACC cache unimplemented yet"); +} + + +void +gfc_resolve_oacc_declare (gfc_namespace *ns) +{ + int list; + gfc_omp_namelist *n; + locus loc; + + if (ns->oacc_declare_clauses == NULL) + return; + + loc = ns->oacc_declare_clauses->loc; + + for (list = OMP_LIST_DEVICE_RESIDENT; + list <= OMP_LIST_DEVICE_RESIDENT; list++) + for (n = ns->oacc_declare_clauses->lists[list]; n; n = n->next) + { + n->sym->mark = 0; + if (n->sym->attr.flavor == FL_PARAMETER) + gfc_error ("PARAMETER object '%s' is not allowed at %L", n->sym->name, &loc); + } + + for (list = OMP_LIST_DEVICE_RESIDENT; + list <= OMP_LIST_DEVICE_RESIDENT; list++) + for (n = ns->oacc_declare_clauses->lists[list]; n; n = n->next) + { + if (n->sym->mark) + gfc_error ("Symbol '%s' present on multiple clauses at %L", + n->sym->name, &loc); + else + n->sym->mark = 1; + } + + for (n = ns->oacc_declare_clauses->lists[OMP_LIST_DEVICE_RESIDENT]; n; + n = n->next) + check_array_not_assumed (n->sym, loc, "DEVICE_RESIDENT"); +} + + +void +gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace *ns ATTRIBUTE_UNUSED) +{ + resolve_oacc_directive_inside_omp_region (code); + + switch (code->op) + { + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_UPDATE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: + case EXEC_OACC_WAIT: + resolve_omp_clauses (code, &code->loc, code->ext.omp_clauses, NULL, + true); + break; + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_LOOP: + resolve_oacc_loop (code); + break; + case EXEC_OACC_CACHE: + resolve_oacc_cache (code); + break; + default: + break; + } +} + /* Resolve OpenMP directive clauses and check various requirements of each directive. */ @@ -3347,6 +4682,8 @@ resolve_omp_do (gfc_code *code) void gfc_resolve_omp_directive (gfc_code *code, gfc_namespace *ns ATTRIBUTE_UNUSED) { + resolve_omp_directive_inside_oacc_region (code); + if (code->op != EXEC_OMP_ATOMIC) gfc_maybe_initialize_eh (); diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c index 1c8294e318a..fd7d4ebc39f 100644 --- a/gcc/fortran/parse.c +++ b/gcc/fortran/parse.c @@ -584,6 +584,93 @@ decode_statement (void) undo_new_statement (); \ } while (0); +static gfc_statement +decode_oacc_directive (void) +{ + locus old_locus; + char c; + + gfc_enforce_clean_symbol_state (); + + gfc_clear_error (); /* Clear any pending errors. */ + gfc_clear_warning (); /* Clear any pending warnings. */ + + if (gfc_pure (NULL)) + { + gfc_error_now ("OpenACC directives at %C may not appear in PURE " + "procedures"); + gfc_error_recovery (); + return ST_NONE; + } + + gfc_unset_implicit_pure (NULL); + + old_locus = gfc_current_locus; + + /* General OpenACC directive matching: Instead of testing every possible + statement, we eliminate most possibilities by peeking at the + first character. */ + + c = gfc_peek_ascii_char (); + + switch (c) + { + case 'c': + match ("cache", gfc_match_oacc_cache, ST_OACC_CACHE); + break; + case 'd': + match ("data", gfc_match_oacc_data, ST_OACC_DATA); + match ("declare", gfc_match_oacc_declare, ST_OACC_DECLARE); + break; + case 'e': + match ("end data", gfc_match_omp_eos, ST_OACC_END_DATA); + match ("end host_data", gfc_match_omp_eos, ST_OACC_END_HOST_DATA); + match ("end kernels loop", gfc_match_omp_eos, ST_OACC_END_KERNELS_LOOP); + match ("end kernels", gfc_match_omp_eos, ST_OACC_END_KERNELS); + match ("end loop", gfc_match_omp_eos, ST_OACC_END_LOOP); + match ("end parallel loop", gfc_match_omp_eos, ST_OACC_END_PARALLEL_LOOP); + match ("end parallel", gfc_match_omp_eos, ST_OACC_END_PARALLEL); + match ("enter data", gfc_match_oacc_enter_data, ST_OACC_ENTER_DATA); + match ("exit data", gfc_match_oacc_exit_data, ST_OACC_EXIT_DATA); + break; + case 'h': + match ("host_data", gfc_match_oacc_host_data, ST_OACC_HOST_DATA); + break; + case 'p': + match ("parallel loop", gfc_match_oacc_parallel_loop, ST_OACC_PARALLEL_LOOP); + match ("parallel", gfc_match_oacc_parallel, ST_OACC_PARALLEL); + break; + case 'k': + match ("kernels loop", gfc_match_oacc_kernels_loop, ST_OACC_KERNELS_LOOP); + match ("kernels", gfc_match_oacc_kernels, ST_OACC_KERNELS); + break; + case 'l': + match ("loop", gfc_match_oacc_loop, ST_OACC_LOOP); + break; + case 'r': + match ("routine", gfc_match_oacc_routine, ST_OACC_ROUTINE); + break; + case 'u': + match ("update", gfc_match_oacc_update, ST_OACC_UPDATE); + break; + case 'w': + match ("wait", gfc_match_oacc_wait, ST_OACC_WAIT); + break; + } + + /* Directive not found or stored an error message. + Check and give up. */ + + if (gfc_error_check () == 0) + gfc_error_now ("Unclassifiable OpenACC directive at %C"); + + reject_statement (); + + gfc_error_recovery (); + + return ST_NONE; +} + static gfc_statement decode_omp_directive (void) { @@ -811,6 +898,23 @@ decode_gcc_attribute (void) #undef match +/* Assert next length characters to be equal to token in free form. */ + +static void +verify_token_free (const char* token, int length, bool last_was_use_stmt) +{ + int i; + char c; + + c = gfc_next_ascii_char (); + for (i = 0; i < length; i++, c = gfc_next_ascii_char ()) + gcc_assert (c == token[i]); + + gcc_assert (gfc_is_whitespace(c)); + gfc_gobble_whitespace (); + if (last_was_use_stmt) + use_modules (); +} /* Get the next statement in free form source. */ @@ -880,7 +984,7 @@ next_free (void) else if (c == '!') { /* Comments have already been skipped by the time we get here, - except for GCC attributes and OpenMP directives. */ + except for GCC attributes and OpenMP/OpenACC directives. */ gfc_next_ascii_char (); /* Eat up the exclamation sign. */ c = gfc_peek_ascii_char (); @@ -897,21 +1001,39 @@ next_free (void) return decode_gcc_attribute (); } - else if (c == '$' && (flag_openmp || flag_openmp_simd)) + else if (c == '$') { - int i; + /* Since both OpenMP and OpenACC directives starts with + !$ character sequence, we must check all flags combinations */ + if ((flag_openmp || flag_openmp_simd) + && !flag_openacc) + { + verify_token_free ("$omp", 4, last_was_use_stmt); + return decode_omp_directive (); + } + else if ((flag_openmp || flag_openmp_simd) + && flag_openacc) + { + gfc_next_ascii_char (); /* Eat up dollar character */ + c = gfc_peek_ascii_char (); - c = gfc_next_ascii_char (); - for (i = 0; i < 4; i++, c = gfc_next_ascii_char ()) - gcc_assert (c == "$omp"[i]); - - gcc_assert (c == ' ' || c == '\t'); - gfc_gobble_whitespace (); - if (last_was_use_stmt) - use_modules (); - return decode_omp_directive (); + if (c == 'o') + { + verify_token_free ("omp", 3, last_was_use_stmt); + return decode_omp_directive (); + } + else if (c == 'a') + { + verify_token_free ("acc", 3, last_was_use_stmt); + return decode_oacc_directive (); + } + } + else if (flag_openacc) + { + verify_token_free ("$acc", 4, last_was_use_stmt); + return decode_oacc_directive (); + } } - gcc_unreachable (); } @@ -927,6 +1049,28 @@ next_free (void) return decode_statement (); } +/* Assert next length characters to be equal to token in fixed form. */ + +static bool +verify_token_fixed (const char *token, int length, bool last_was_use_stmt) +{ + int i; + char c = gfc_next_char_literal (NONSTRING); + + for (i = 0; i < length; i++, c = gfc_next_char_literal (NONSTRING)) + gcc_assert ((char) gfc_wide_tolower (c) == token[i]); + + if (c != ' ' && c != '0') + { + gfc_buffer_error (false); + gfc_error ("Bad continuation line at %C"); + return false; + } + if (last_was_use_stmt) + use_modules (); + + return true; +} /* Get the next statement in fixed-form source. */ @@ -986,21 +1130,38 @@ next_fixed (void) return decode_gcc_attribute (); } - else if (c == '$' - && (flag_openmp || flag_openmp_simd)) + else if (c == '$') { - for (i = 0; i < 4; i++, c = gfc_next_char_literal (NONSTRING)) - gcc_assert ((char) gfc_wide_tolower (c) == "$omp"[i]); - - if (c != ' ' && c != '0') + if ((flag_openmp || flag_openmp_simd) + && !flag_openacc) { - gfc_buffer_error (false); - gfc_error ("Bad continuation line at %C"); - return ST_NONE; + if (!verify_token_fixed ("omp", 3, last_was_use_stmt)) + return ST_NONE; + return decode_omp_directive (); + } + else if ((flag_openmp || flag_openmp_simd) + && flag_openacc) + { + c = gfc_next_char_literal(NONSTRING); + if (c == 'o' || c == 'O') + { + if (!verify_token_fixed ("mp", 2, last_was_use_stmt)) + return ST_NONE; + return decode_omp_directive (); + } + else if (c == 'a' || c == 'A') + { + if (!verify_token_fixed ("cc", 2, last_was_use_stmt)) + return ST_NONE; + return decode_oacc_directive (); + } + } + else if (flag_openacc) + { + if (!verify_token_fixed ("acc", 3, last_was_use_stmt)) + return ST_NONE; + return decode_oacc_directive (); } - if (last_was_use_stmt) - use_modules (); - return decode_omp_directive (); } /* FALLTHROUGH */ @@ -1161,7 +1322,9 @@ next_statement (void) case ST_OMP_BARRIER: case ST_OMP_TASKWAIT: case ST_OMP_TASKYIELD: \ case ST_OMP_CANCEL: case ST_OMP_CANCELLATION_POINT: \ case ST_OMP_TARGET_UPDATE: case ST_ERROR_STOP: case ST_SYNC_ALL: \ - case ST_SYNC_IMAGES: case ST_SYNC_MEMORY: case ST_LOCK: case ST_UNLOCK + case ST_SYNC_IMAGES: case ST_SYNC_MEMORY: case ST_LOCK: case ST_UNLOCK: \ + case ST_OACC_UPDATE: case ST_OACC_WAIT: case ST_OACC_CACHE: \ + case ST_OACC_ENTER_DATA: case ST_OACC_EXIT_DATA /* Statements that mark other executable statements. */ @@ -1186,7 +1349,9 @@ next_statement (void) case ST_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD: case ST_OMP_DISTRIBUTE: \ case ST_OMP_DISTRIBUTE_SIMD: case ST_OMP_DISTRIBUTE_PARALLEL_DO: \ case ST_OMP_DISTRIBUTE_PARALLEL_DO_SIMD: \ - case ST_CRITICAL + case ST_CRITICAL: \ + case ST_OACC_PARALLEL_LOOP: case ST_OACC_PARALLEL: case ST_OACC_KERNELS: \ + case ST_OACC_DATA: case ST_OACC_HOST_DATA: case ST_OACC_LOOP: case ST_OACC_KERNELS_LOOP /* Declaration statements */ @@ -1194,7 +1359,7 @@ next_statement (void) case ST_EQUIVALENCE: case ST_NAMELIST: case ST_STATEMENT_FUNCTION: \ case ST_TYPE: case ST_INTERFACE: case ST_OMP_THREADPRIVATE: \ case ST_PROCEDURE: case ST_OMP_DECLARE_SIMD: case ST_OMP_DECLARE_REDUCTION: \ - case ST_OMP_DECLARE_TARGET + case ST_OMP_DECLARE_TARGET: case ST_OACC_ROUTINE /* Block end statements. Errors associated with interchanging these are detected in gfc_match_end(). */ @@ -1214,6 +1379,8 @@ push_state (gfc_state_data *p, gfc_compile_state new_state, gfc_symbol *sym) p->sym = sym; p->head = p->tail = NULL; p->do_variable = NULL; + if (p->state != COMP_DO && p->state != COMP_DO_CONCURRENT) + p->ext.oacc_declare_clauses = NULL; /* If this the state of a construct like BLOCK, DO or IF, the corresponding construct statement was accepted right before pushing the state. Thus, @@ -1679,6 +1846,69 @@ gfc_ascii_statement (gfc_statement st) case ST_END_ENUM: p = "END ENUM"; break; + case ST_OACC_PARALLEL_LOOP: + p = "!$ACC PARALLEL LOOP"; + break; + case ST_OACC_END_PARALLEL_LOOP: + p = "!$ACC END PARALLEL LOOP"; + break; + case ST_OACC_PARALLEL: + p = "!$ACC PARALLEL"; + break; + case ST_OACC_END_PARALLEL: + p = "!$ACC END PARALLEL"; + break; + case ST_OACC_KERNELS: + p = "!$ACC KERNELS"; + break; + case ST_OACC_END_KERNELS: + p = "!$ACC END KERNELS"; + break; + case ST_OACC_KERNELS_LOOP: + p = "!$ACC KERNELS LOOP"; + break; + case ST_OACC_END_KERNELS_LOOP: + p = "!$ACC END KERNELS LOOP"; + break; + case ST_OACC_DATA: + p = "!$ACC DATA"; + break; + case ST_OACC_END_DATA: + p = "!$ACC END DATA"; + break; + case ST_OACC_HOST_DATA: + p = "!$ACC HOST_DATA"; + break; + case ST_OACC_END_HOST_DATA: + p = "!$ACC END HOST_DATA"; + break; + case ST_OACC_LOOP: + p = "!$ACC LOOP"; + break; + case ST_OACC_END_LOOP: + p = "!$ACC END LOOP"; + break; + case ST_OACC_DECLARE: + p = "!$ACC DECLARE"; + break; + case ST_OACC_UPDATE: + p = "!$ACC UPDATE"; + break; + case ST_OACC_WAIT: + p = "!$ACC WAIT"; + break; + case ST_OACC_CACHE: + p = "!$ACC CACHE"; + break; + case ST_OACC_ENTER_DATA: + p = "!$ACC ENTER DATA"; + break; + case ST_OACC_EXIT_DATA: + p = "!$ACC EXIT DATA"; + break; + case ST_OACC_ROUTINE: + p = "!$ACC ROUTINE"; + break; case ST_OMP_ATOMIC: p = "!$OMP ATOMIC"; break; @@ -2180,6 +2410,7 @@ verify_st_order (st_state *p, gfc_statement st, bool silent) case ST_PUBLIC: case ST_PRIVATE: case ST_DERIVED_DECL: + case ST_OACC_DECLARE: case_decl: if (p->state >= ORDER_EXEC) goto order; @@ -3081,6 +3312,19 @@ declSt: st = next_statement (); goto loop; + case ST_OACC_DECLARE: + if (!verify_st_order(&ss, st, false)) + { + reject_statement (); + st = next_statement (); + goto loop; + } + if (gfc_state_stack->ext.oacc_declare_clauses == NULL) + gfc_state_stack->ext.oacc_declare_clauses = new_st.ext.omp_clauses; + accept_statement (st); + st = next_statement (); + goto loop; + default: break; } @@ -3571,9 +3815,15 @@ static void parse_critical_block (void) { gfc_code *top, *d; - gfc_state_data s; + gfc_state_data s, *sd; gfc_statement st; + for (sd = gfc_state_stack; sd; sd = sd->previous) + if (sd->state == COMP_OMP_STRUCTURED_BLOCK) + gfc_error_now (is_oacc (sd) + ? "CRITICAL block inside of OpenACC region at %C" + : "CRITICAL block inside of OpenMP region at %C"); + s.ext.end_do_label = new_st.label1; accept_statement (ST_CRITICAL); @@ -3988,6 +4238,128 @@ parse_omp_atomic (void) } +/* Parse the statements of an OpenACC structured block. */ + +static void +parse_oacc_structured_block (gfc_statement acc_st) +{ + gfc_statement st, acc_end_st; + gfc_code *cp, *np; + gfc_state_data s, *sd; + + for (sd = gfc_state_stack; sd; sd = sd->previous) + if (sd->state == COMP_CRITICAL) + gfc_error_now ("OpenACC directive inside of CRITICAL block at %C"); + + accept_statement (acc_st); + + cp = gfc_state_stack->tail; + push_state (&s, COMP_OMP_STRUCTURED_BLOCK, NULL); + np = new_level (cp); + np->op = cp->op; + np->block = NULL; + switch (acc_st) + { + case ST_OACC_PARALLEL: + acc_end_st = ST_OACC_END_PARALLEL; + break; + case ST_OACC_KERNELS: + acc_end_st = ST_OACC_END_KERNELS; + break; + case ST_OACC_DATA: + acc_end_st = ST_OACC_END_DATA; + break; + case ST_OACC_HOST_DATA: + acc_end_st = ST_OACC_END_HOST_DATA; + break; + default: + gcc_unreachable (); + } + + do + { + st = parse_executable (ST_NONE); + if (st == ST_NONE) + unexpected_eof (); + else if (st != acc_end_st) + gfc_error ("Expecting %s at %C", gfc_ascii_statement (acc_end_st)); + reject_statement (); + } + while (st != acc_end_st); + + gcc_assert (new_st.op == EXEC_NOP); + + gfc_clear_new_st (); + gfc_commit_symbols (); + gfc_warning_check (); + pop_state (); +} + +/* Parse the statements of OpenACC loop/parallel loop/kernels loop. */ + +static gfc_statement +parse_oacc_loop (gfc_statement acc_st) +{ + gfc_statement st; + gfc_code *cp, *np; + gfc_state_data s, *sd; + + for (sd = gfc_state_stack; sd; sd = sd->previous) + if (sd->state == COMP_CRITICAL) + gfc_error_now ("OpenACC directive inside of CRITICAL block at %C"); + + accept_statement (acc_st); + + cp = gfc_state_stack->tail; + push_state (&s, COMP_OMP_STRUCTURED_BLOCK, NULL); + np = new_level (cp); + np->op = cp->op; + np->block = NULL; + + for (;;) + { + st = next_statement (); + if (st == ST_NONE) + unexpected_eof (); + else if (st == ST_DO) + break; + else + { + gfc_error ("Expected DO loop at %C"); + reject_statement (); + } + } + + parse_do_block (); + if (gfc_statement_label != NULL + && gfc_state_stack->previous != NULL + && gfc_state_stack->previous->state == COMP_DO + && gfc_state_stack->previous->ext.end_do_label == gfc_statement_label) + { + pop_state (); + return ST_IMPLIED_ENDDO; + } + + check_do_closure (); + pop_state (); + + st = next_statement (); + if (st == ST_OACC_END_LOOP) + gfc_warning ("Redundant !$ACC END LOOP at %C"); + if ((acc_st == ST_OACC_PARALLEL_LOOP && st == ST_OACC_END_PARALLEL_LOOP) || + (acc_st == ST_OACC_KERNELS_LOOP && st == ST_OACC_END_KERNELS_LOOP) || + (acc_st == ST_OACC_LOOP && st == ST_OACC_END_LOOP)) + { + gcc_assert (new_st.op == EXEC_NOP); + gfc_clear_new_st (); + gfc_commit_symbols (); + gfc_warning_check (); + st = next_statement (); + } + return st; +} + + /* Parse the statements of an OpenMP structured block. */ static void @@ -4307,6 +4679,21 @@ parse_executable (gfc_statement st) parse_forall_block (); break; + case ST_OACC_PARALLEL_LOOP: + case ST_OACC_KERNELS_LOOP: + case ST_OACC_LOOP: + st = parse_oacc_loop (st); + if (st == ST_IMPLIED_ENDDO) + return st; + continue; + + case ST_OACC_PARALLEL: + case ST_OACC_KERNELS: + case ST_OACC_DATA: + case ST_OACC_HOST_DATA: + parse_oacc_structured_block (st); + break; + case ST_OMP_PARALLEL: case ST_OMP_PARALLEL_SECTIONS: case ST_OMP_SECTIONS: @@ -4637,6 +5024,13 @@ contains: done: gfc_current_ns->code = gfc_state_stack->head; + if (gfc_state_stack->state == COMP_PROGRAM + || gfc_state_stack->state == COMP_MODULE + || gfc_state_stack->state == COMP_SUBROUTINE + || gfc_state_stack->state == COMP_FUNCTION + || gfc_state_stack->state == COMP_BLOCK) + gfc_current_ns->oacc_declare_clauses + = gfc_state_stack->ext.oacc_declare_clauses; } @@ -5155,3 +5549,28 @@ duplicate_main: gfc_done_2 (); return true; } + +/* Return true if this state data represents an OpenACC region. */ +bool +is_oacc (gfc_state_data *sd) +{ + switch (sd->construct->op) + { + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_LOOP: + case EXEC_OACC_UPDATE: + case EXEC_OACC_WAIT: + case EXEC_OACC_CACHE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: + return true; + + default: + return false; + } +} diff --git a/gcc/fortran/parse.h b/gcc/fortran/parse.h index d6b45fb3b84..8a1613f5322 100644 --- a/gcc/fortran/parse.h +++ b/gcc/fortran/parse.h @@ -49,6 +49,7 @@ typedef struct gfc_state_data union { gfc_st_label *end_do_label; + gfc_omp_clauses *oacc_declare_clauses; } ext; } @@ -68,4 +69,5 @@ match gfc_match_enumerator_def (void); void gfc_free_enum_history (void); extern bool gfc_matching_function; match gfc_match_prefix (gfc_typespec *); +bool is_oacc (gfc_state_data *); #endif /* GFC_PARSE_H */ diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 52734e08bb9..88f35ffb065 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -9126,6 +9126,18 @@ gfc_resolve_blocks (gfc_code *b, gfc_namespace *ns) case EXEC_WAIT: break; + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_LOOP: + case EXEC_OACC_UPDATE: + case EXEC_OACC_WAIT: + case EXEC_OACC_CACHE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: case EXEC_OMP_ATOMIC: case EXEC_OMP_CRITICAL: case EXEC_OMP_DISTRIBUTE: @@ -9941,6 +9953,15 @@ gfc_resolve_code (gfc_code *code, gfc_namespace *ns) omp_workshare_save = -1; switch (code->op) { + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_LOOP: + gfc_resolve_oacc_blocks (code, ns); + break; case EXEC_OMP_PARALLEL_WORKSHARE: omp_workshare_save = omp_workshare_flag; omp_workshare_flag = 1; @@ -10293,6 +10314,21 @@ gfc_resolve_code (gfc_code *code, gfc_namespace *ns) "expression", &code->expr1->where); break; + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_LOOP: + case EXEC_OACC_UPDATE: + case EXEC_OACC_WAIT: + case EXEC_OACC_CACHE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: + gfc_resolve_oacc_directive (code, ns); + break; + case EXEC_OMP_ATOMIC: case EXEC_OMP_BARRIER: case EXEC_OMP_CANCEL: @@ -14931,6 +14967,7 @@ resolve_codes (gfc_namespace *ns) old_obstack = labels_obstack; bitmap_obstack_initialize (&labels_obstack); + gfc_resolve_oacc_declare (ns); gfc_resolve_code (ns->code, ns); bitmap_obstack_release (&labels_obstack); diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c index f804060a26d..4a71cb20dcf 100644 --- a/gcc/fortran/scanner.c +++ b/gcc/fortran/scanner.c @@ -55,9 +55,12 @@ gfc_directorylist *include_dirs, *intrinsic_modules_dirs; static gfc_file *file_head, *current_file; -static int continue_flag, end_flag, openmp_flag, gcc_attribute_flag; +static int continue_flag, end_flag, gcc_attribute_flag; +/* If !$omp/!$acc occurred in current comment line. */ +static int openmp_flag, openacc_flag; static int continue_count, continue_line; static locus openmp_locus; +static locus openacc_locus; static locus gcc_attribute_locus; gfc_source_form gfc_current_form; @@ -707,11 +710,89 @@ skip_gcc_attribute (locus start) return r; } +/* Return true if CC was matched. */ +static bool +skip_oacc_attribute (locus start, locus old_loc, bool continue_flag) +{ + bool r = false; + char c; + if ((c = next_char ()) == 'c' || c == 'C') + if ((c = next_char ()) == 'c' || c == 'C') + r = true; + + if (r) + { + if ((c = next_char ()) == ' ' || c == '\t' + || continue_flag) + { + while (gfc_is_whitespace (c)) + c = next_char (); + if (c != '\n' && c != '!') + { + openacc_flag = 1; + openacc_locus = old_loc; + gfc_current_locus = start; + } + else + r = false; + } + else + { + gfc_warning_now ("!$ACC at %C starts a commented " + "line as it neither is followed " + "by a space nor is a " + "continuation line"); + r = false; + } + } + + return r; +} + +/* Return true if MP was matched. */ +static bool +skip_omp_attribute (locus start, locus old_loc, bool continue_flag) +{ + bool r = false; + char c; + + if ((c = next_char ()) == 'm' || c == 'M') + if ((c = next_char ()) == 'p' || c == 'P') + r = true; + + if (r) + { + if ((c = next_char ()) == ' ' || c == '\t' + || continue_flag) + { + while (gfc_is_whitespace (c)) + c = next_char (); + if (c != '\n' && c != '!') + { + openmp_flag = 1; + openmp_locus = old_loc; + gfc_current_locus = start; + } + else + r = false; + } + else + { + gfc_warning_now ("!$OMP at %C starts a commented " + "line as it neither is followed " + "by a space nor is a " + "continuation line"); + r = false; + } + } + + return r; +} /* Comment lines are null lines, lines containing only blanks or lines on which the first nonblank line is a '!'. - Return true if !$ openmp conditional compilation sentinel was + Return true if !$ openmp or openacc conditional compilation sentinel was seen. */ static bool @@ -744,55 +825,98 @@ skip_free_comments (void) if (at_bol && skip_gcc_attribute (start)) return false; - /* If -fopenmp, we need to handle here 2 things: - 1) don't treat !$omp as comments, but directives - 2) handle OpenMP conditional compilation, where + /* If -fopenmp/-fopenacc, we need to handle here 2 things: + 1) don't treat !$omp/!$acc as comments, but directives + 2) handle OpenMP/OpenACC conditional compilation, where !$ should be treated as 2 spaces (for initial lines only if followed by space). */ - if ((flag_openmp || flag_openmp_simd) && at_bol) - { - locus old_loc = gfc_current_locus; - if (next_char () == '$') - { - c = next_char (); - if (c == 'o' || c == 'O') - { - if (((c = next_char ()) == 'm' || c == 'M') - && ((c = next_char ()) == 'p' || c == 'P')) + if (at_bol) + { + if ((flag_openmp || flag_openmp_simd) + && flag_openacc) + { + locus old_loc = gfc_current_locus; + if (next_char () == '$') + { + c = next_char (); + if (c == 'o' || c == 'O') + { + if (skip_omp_attribute (start, old_loc, continue_flag)) + return false; + gfc_current_locus = old_loc; + next_char (); + c = next_char (); + } + else if (c == 'a' || c == 'A') + { + if (skip_oacc_attribute (start, old_loc, continue_flag)) + return false; + gfc_current_locus = old_loc; + next_char (); + c = next_char (); + } + if (continue_flag || c == ' ' || c == '\t') + { + gfc_current_locus = old_loc; + next_char (); + openmp_flag = openacc_flag = 0; + return true; + } + } + gfc_current_locus = old_loc; + } + else if ((flag_openmp || flag_openmp_simd) + && !flag_openacc) + { + locus old_loc = gfc_current_locus; + if (next_char () == '$') + { + c = next_char (); + if (c == 'o' || c == 'O') + { + if (skip_omp_attribute (start, old_loc, continue_flag)) + return false; + gfc_current_locus = old_loc; + next_char (); + c = next_char (); + } + if (continue_flag || c == ' ' || c == '\t') + { + gfc_current_locus = old_loc; + next_char (); + openmp_flag = 0; + return true; + } + } + gfc_current_locus = old_loc; + } + else if (flag_openacc + && !(flag_openmp || flag_openmp_simd)) + { + locus old_loc = gfc_current_locus; + if (next_char () == '$') + { + c = next_char (); + if (c == 'a' || c == 'A') { - if ((c = next_char ()) == ' ' || c == '\t' - || continue_flag) - { - while (gfc_is_whitespace (c)) - c = next_char (); - if (c != '\n' && c != '!') - { - openmp_flag = 1; - openmp_locus = old_loc; - gfc_current_locus = start; - return false; - } - } - else - gfc_warning_now ("!$OMP at %C starts a commented " - "line as it neither is followed " - "by a space nor is a " - "continuation line"); + if (skip_oacc_attribute (start, old_loc, + continue_flag)) + return false; + gfc_current_locus = old_loc; + next_char(); + c = next_char(); } - gfc_current_locus = old_loc; - next_char (); - c = next_char (); - } - if (continue_flag || c == ' ' || c == '\t') - { - gfc_current_locus = old_loc; - next_char (); - openmp_flag = 0; - return true; - } - } - gfc_current_locus = old_loc; - } + if (continue_flag || c == ' ' || c == '\t') + { + gfc_current_locus = old_loc; + next_char(); + openacc_flag = 0; + return true; + } + } + gfc_current_locus = old_loc; + } + } skip_comment_line (); continue; } @@ -803,6 +927,9 @@ skip_free_comments (void) if (openmp_flag && at_bol) openmp_flag = 0; + if (openacc_flag && at_bol) + openacc_flag = 0; + gcc_attribute_flag = 0; gfc_current_locus = start; return false; @@ -865,9 +992,10 @@ skip_fixed_comments (void) return; } - /* If -fopenmp, we need to handle here 2 things: - 1) don't treat !$omp|c$omp|*$omp as comments, but directives - 2) handle OpenMP conditional compilation, where + /* If -fopenmp/-fopenacc, we need to handle here 2 things: + 1) don't treat !$omp/!$acc|c$omp/c$acc|*$omp / *$acc as comments, + but directives + 2) handle OpenMP/OpenACC conditional compilation, where !$|c$|*$ should be treated as 2 spaces if the characters in columns 3 to 6 are valid fixed form label columns characters. */ @@ -934,6 +1062,67 @@ skip_fixed_comments (void) } gfc_current_locus = start; } + + if (flag_openacc) + { + if (next_char () == '$') + { + c = next_char (); + if (c == 'a' || c == 'A') + { + if (((c = next_char ()) == 'c' || c == 'C') + && ((c = next_char ()) == 'c' || c == 'C')) + { + c = next_char (); + if (c != '\n' + && ((openacc_flag && continue_flag) + || c == ' ' || c == '\t' || c == '0')) + { + do + c = next_char (); + while (gfc_is_whitespace (c)); + if (c != '\n' && c != '!') + { + /* Canonicalize to *$acc. */ + *start.nextc = '*'; + openacc_flag = 1; + gfc_current_locus = start; + return; + } + } + } + } + else + { + int digit_seen = 0; + + for (col = 3; col < 6; col++, c = next_char ()) + if (c == ' ') + continue; + else if (c == '\t') + { + col = 6; + break; + } + else if (c < '0' || c > '9') + break; + else + digit_seen = 1; + + if (col == 6 && c != '\n' + && ((continue_flag && !digit_seen) + || c == ' ' || c == '\t' || c == '0')) + { + gfc_current_locus = start; + start.nextc[0] = ' '; + start.nextc[1] = ' '; + continue; + } + } + } + gfc_current_locus = start; + } + skip_comment_line (); continue; } @@ -976,6 +1165,7 @@ skip_fixed_comments (void) } openmp_flag = 0; + openacc_flag = 0; gcc_attribute_flag = 0; gfc_current_locus = start; } @@ -1004,10 +1194,11 @@ gfc_char_t gfc_next_char_literal (gfc_instring in_string) { locus old_loc; - int i, prev_openmp_flag; + int i, prev_openmp_flag, prev_openacc_flag; gfc_char_t c; continue_flag = 0; + prev_openacc_flag = prev_openmp_flag = 0; restart: c = next_char (); @@ -1033,6 +1224,11 @@ restart: sizeof (gfc_current_locus)) == 0) goto done; + if (openacc_flag + && memcmp (&gfc_current_locus, &openacc_locus, + sizeof (gfc_current_locus)) == 0) + goto done; + /* This line can't be continued */ do { @@ -1088,7 +1284,11 @@ restart: goto done; } - prev_openmp_flag = openmp_flag; + if (flag_openmp) + prev_openmp_flag = openmp_flag; + if (flag_openacc) + prev_openacc_flag = openacc_flag; + continue_flag = 1; if (c == '!') skip_comment_line (); @@ -1118,13 +1318,23 @@ restart: && continue_line < gfc_linebuf_linenum (gfc_current_locus.lb)) continue_line = gfc_linebuf_linenum (gfc_current_locus.lb); - if (prev_openmp_flag != openmp_flag) - { - gfc_current_locus = old_loc; - openmp_flag = prev_openmp_flag; - c = '&'; - goto done; - } + if (flag_openmp) + if (prev_openmp_flag != openmp_flag) + { + gfc_current_locus = old_loc; + openmp_flag = prev_openmp_flag; + c = '&'; + goto done; + } + + if (flag_openacc) + if (prev_openacc_flag != openacc_flag) + { + gfc_current_locus = old_loc; + openacc_flag = prev_openacc_flag; + c = '&'; + goto done; + } /* Now that we have a non-comment line, probe ahead for the first non-whitespace character. If it is another '&', then @@ -1148,6 +1358,17 @@ restart: while (gfc_is_whitespace (c)) c = next_char (); } + if (openacc_flag) + { + for (i = 0; i < 5; i++, c = next_char ()) + { + gcc_assert (gfc_wide_tolower (c) == (unsigned char) "!$acc"[i]); + if (i == 4) + old_loc = gfc_current_locus; + } + while (gfc_is_whitespace (c)) + c = next_char (); + } if (c != '&') { @@ -1161,7 +1382,7 @@ restart: } /* Both !$omp and !$ -fopenmp continuation lines have & on the continuation line only optionally. */ - else if (openmp_flag || openmp_cond_flag) + else if (openmp_flag || openacc_flag || openmp_cond_flag) gfc_current_locus.nextc--; else { @@ -1199,7 +1420,11 @@ restart: "Line truncated at %L", &gfc_current_locus); } - prev_openmp_flag = openmp_flag; + if (flag_openmp) + prev_openmp_flag = openmp_flag; + if (flag_openacc) + prev_openacc_flag = openacc_flag; + continue_flag = 1; old_loc = gfc_current_locus; @@ -1207,26 +1432,38 @@ restart: skip_fixed_comments (); /* See if this line is a continuation line. */ - if (openmp_flag != prev_openmp_flag) + if (flag_openmp && openmp_flag != prev_openmp_flag) { openmp_flag = prev_openmp_flag; goto not_continuation; } + if (flag_openacc && openacc_flag != prev_openacc_flag) + { + openacc_flag = prev_openacc_flag; + goto not_continuation; + } - if (!openmp_flag) + if (!openmp_flag && !openacc_flag) for (i = 0; i < 5; i++) { c = next_char (); if (c != ' ') goto not_continuation; } - else + else if (openmp_flag) for (i = 0; i < 5; i++) { c = next_char (); if (gfc_wide_tolower (c) != (unsigned char) "*$omp"[i]) goto not_continuation; } + else if (openacc_flag) + for (i = 0; i < 5; i++) + { + c = next_char (); + if (gfc_wide_tolower (c) != (unsigned char) "*$acc"[i]) + goto not_continuation; + } c = next_char (); if (c == '0' || c == ' ' || c == '\n') diff --git a/gcc/fortran/st.c b/gcc/fortran/st.c index 7347e36a5eb..116af15d87e 100644 --- a/gcc/fortran/st.c +++ b/gcc/fortran/st.c @@ -185,6 +185,18 @@ gfc_free_statement (gfc_code *p) gfc_free_forall_iterator (p->ext.forall_iterator); break; + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_LOOP: + case EXEC_OACC_UPDATE: + case EXEC_OACC_WAIT: + case EXEC_OACC_CACHE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: case EXEC_OMP_CANCEL: case EXEC_OMP_CANCELLATION_POINT: case EXEC_OMP_DISTRIBUTE: diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 667ebadf6c5..cad9b5be3ba 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -5804,6 +5804,13 @@ gfc_generate_function_code (gfc_namespace * ns) if ((gfc_option.rtcheck & GFC_RTCHECK_BOUNDS) && !sym->attr.is_bind_c) add_argument_checking (&body, sym); + /* Generate !$ACC DECLARE directive. */ + if (ns->oacc_declare_clauses) + { + tree tmp = gfc_trans_oacc_declare (&body, ns); + gfc_add_expr_to_block (&body, tmp); + } + tmp = gfc_trans_code (ns->code); gfc_add_expr_to_block (&body, tmp); diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index e77c1915452..fe47a966108 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see #include "trans-const.h" #include "arith.h" #include "omp-low.h" +#include "gomp-constants.h" int ompws_flags; @@ -1045,7 +1046,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p) return; tree orig_decl = decl; c4 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (c4) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (c4) = decl; OMP_CLAUSE_SIZE (c4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -1056,7 +1057,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p) || GFC_DECL_GET_SCALAR_ALLOCATABLE (orig_decl))) { c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (c3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (c3) = unshare_expr (decl); OMP_CLAUSE_SIZE (c3) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -1073,11 +1074,11 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p) ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (c) = ptr; c2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (c2) = OMP_CLAUSE_MAP_TO_PSET; + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_TO_PSET); OMP_CLAUSE_DECL (c2) = decl; OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (type); c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (c3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (c3) = gfc_conv_descriptor_data_get (decl); OMP_CLAUSE_SIZE (c3) = size_int (0); tree size = create_tmp_var (gfc_array_index_type); @@ -1718,6 +1719,21 @@ gfc_trans_omp_reduction_list (gfc_omp_namelist *namelist, tree list, return list; } +static inline tree +gfc_convert_expr_to_tree (stmtblock_t *block, gfc_expr *expr) +{ + gfc_se se; + tree result; + + gfc_init_se (&se, NULL ); + gfc_conv_expr (&se, expr); + gfc_add_block_to_block (block, &se.pre); + result = gfc_evaluate_now (se.expr, block); + gfc_add_block_to_block (block, &se.post); + + return result; +} + static tree gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, locus where, bool declare_simd = false) @@ -1761,7 +1777,17 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, goto add_clause; case OMP_LIST_UNIFORM: clause_code = OMP_CLAUSE_UNIFORM; - /* FALLTHROUGH */ + goto add_clause; + case OMP_LIST_USE_DEVICE: + clause_code = OMP_CLAUSE_USE_DEVICE; + goto add_clause; + case OMP_LIST_DEVICE_RESIDENT: + clause_code = OMP_CLAUSE_DEVICE_RESIDENT; + goto add_clause; + case OMP_LIST_CACHE: + clause_code = OMP_CLAUSE__CACHE_; + goto add_clause; + add_clause: omp_clauses = gfc_trans_omp_variable_list (clause_code, n, omp_clauses, @@ -1928,7 +1954,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, tree orig_decl = decl; node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node4) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -1938,7 +1964,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, { node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (node3) = decl; OMP_CLAUSE_SIZE (node3) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -1954,12 +1980,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_DECL (node) = ptr; node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node2) = OMP_CLAUSE_MAP_TO_PSET; + OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET); OMP_CLAUSE_DECL (node2) = decl; OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (node3) = gfc_conv_descriptor_data_get (decl); OMP_CLAUSE_SIZE (node3) = size_int (0); @@ -2045,7 +2071,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, { node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node4) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -2057,12 +2083,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, ptr2 = gfc_conv_descriptor_data_get (decl); node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node2) = OMP_CLAUSE_MAP_TO_PSET; + OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET); OMP_CLAUSE_DECL (node2) = decl; OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (node3) = gfc_conv_descriptor_data_get (decl); } @@ -2077,7 +2103,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, } node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_MAP_KIND (node3) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (node3) = decl; } ptr2 = fold_convert (sizetype, ptr2); @@ -2087,16 +2113,37 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, switch (n->u.map_op) { case OMP_MAP_ALLOC: - OMP_CLAUSE_MAP_KIND (node) = OMP_CLAUSE_MAP_ALLOC; + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_ALLOC); break; case OMP_MAP_TO: - OMP_CLAUSE_MAP_KIND (node) = OMP_CLAUSE_MAP_TO; + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_TO); break; case OMP_MAP_FROM: - OMP_CLAUSE_MAP_KIND (node) = OMP_CLAUSE_MAP_FROM; + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FROM); break; case OMP_MAP_TOFROM: - OMP_CLAUSE_MAP_KIND (node) = OMP_CLAUSE_MAP_TOFROM; + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_TOFROM); + break; + case OMP_MAP_FORCE_ALLOC: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_ALLOC); + break; + case OMP_MAP_FORCE_DEALLOC: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEALLOC); + break; + case OMP_MAP_FORCE_TO: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_TO); + break; + case OMP_MAP_FORCE_FROM: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_FROM); + break; + case OMP_MAP_FORCE_TOFROM: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_TOFROM); + break; + case OMP_MAP_FORCE_PRESENT: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_PRESENT); + break; + case OMP_MAP_FORCE_DEVICEPTR: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEVICEPTR); break; default: gcc_unreachable (); @@ -2463,6 +2510,111 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, omp_clauses = gfc_trans_add_clause (c, omp_clauses); } + if (clauses->async) + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_ASYNC); + if (clauses->async_expr) + OMP_CLAUSE_ASYNC_EXPR (c) + = gfc_convert_expr_to_tree (block, clauses->async_expr); + else + OMP_CLAUSE_ASYNC_EXPR (c) = NULL; + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + if (clauses->seq) + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_ORDERED); + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + if (clauses->independent) + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_INDEPENDENT); + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + if (clauses->wait_list) + { + gfc_expr_list *el; + + for (el = clauses->wait_list; el; el = el->next) + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_WAIT); + OMP_CLAUSE_DECL (c) = gfc_convert_expr_to_tree (block, el->expr); + OMP_CLAUSE_CHAIN (c) = omp_clauses; + omp_clauses = c; + } + } + if (clauses->num_gangs_expr) + { + tree num_gangs_var + = gfc_convert_expr_to_tree (block, clauses->num_gangs_expr); + c = build_omp_clause (where.lb->location, OMP_CLAUSE_NUM_GANGS); + OMP_CLAUSE_NUM_GANGS_EXPR (c) = num_gangs_var; + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + if (clauses->num_workers_expr) + { + tree num_workers_var + = gfc_convert_expr_to_tree (block, clauses->num_workers_expr); + c = build_omp_clause (where.lb->location, OMP_CLAUSE_NUM_WORKERS); + OMP_CLAUSE_NUM_WORKERS_EXPR (c) = num_workers_var; + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + if (clauses->vector_length_expr) + { + tree vector_length_var + = gfc_convert_expr_to_tree (block, clauses->vector_length_expr); + c = build_omp_clause (where.lb->location, OMP_CLAUSE_VECTOR_LENGTH); + OMP_CLAUSE_VECTOR_LENGTH_EXPR (c) = vector_length_var; + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + if (clauses->vector) + { + if (clauses->vector_expr) + { + tree vector_var + = gfc_convert_expr_to_tree (block, clauses->vector_expr); + c = build_omp_clause (where.lb->location, OMP_CLAUSE_VECTOR); + OMP_CLAUSE_VECTOR_EXPR (c) = vector_var; + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + else + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_VECTOR); + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + } + if (clauses->worker) + { + if (clauses->worker_expr) + { + tree worker_var + = gfc_convert_expr_to_tree (block, clauses->worker_expr); + c = build_omp_clause (where.lb->location, OMP_CLAUSE_WORKER); + OMP_CLAUSE_WORKER_EXPR (c) = worker_var; + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + else + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_WORKER); + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + } + if (clauses->gang) + { + if (clauses->gang_expr) + { + tree gang_var + = gfc_convert_expr_to_tree (block, clauses->gang_expr); + c = build_omp_clause (where.lb->location, OMP_CLAUSE_GANG); + OMP_CLAUSE_GANG_EXPR (c) = gang_var; + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + else + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_GANG); + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } + } + return nreverse (omp_clauses); } @@ -2490,6 +2642,115 @@ gfc_trans_omp_code (gfc_code *code, bool force_empty) return stmt; } +/* Trans OpenACC directives. */ +/* parallel, kernels, data and host_data. */ +static tree +gfc_trans_oacc_construct (gfc_code *code) +{ + stmtblock_t block; + tree stmt, oacc_clauses; + enum tree_code construct_code; + + switch (code->op) + { + case EXEC_OACC_PARALLEL: + construct_code = OACC_PARALLEL; + break; + case EXEC_OACC_KERNELS: + construct_code = OACC_KERNELS; + break; + case EXEC_OACC_DATA: + construct_code = OACC_DATA; + break; + case EXEC_OACC_HOST_DATA: + construct_code = OACC_HOST_DATA; + break; + default: + gcc_unreachable (); + } + + gfc_start_block (&block); + oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, + code->loc); + stmt = gfc_trans_omp_code (code->block->next, true); + stmt = build2_loc (input_location, construct_code, void_type_node, stmt, + oacc_clauses); + gfc_add_expr_to_block (&block, stmt); + return gfc_finish_block (&block); +} + +/* update, enter_data, exit_data, cache. */ +static tree +gfc_trans_oacc_executable_directive (gfc_code *code) +{ + stmtblock_t block; + tree stmt, oacc_clauses; + enum tree_code construct_code; + + switch (code->op) + { + case EXEC_OACC_UPDATE: + construct_code = OACC_UPDATE; + break; + case EXEC_OACC_ENTER_DATA: + construct_code = OACC_ENTER_DATA; + break; + case EXEC_OACC_EXIT_DATA: + construct_code = OACC_EXIT_DATA; + break; + case EXEC_OACC_CACHE: + construct_code = OACC_CACHE; + break; + default: + gcc_unreachable (); + } + + gfc_start_block (&block); + oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, + code->loc); + stmt = build1_loc (input_location, construct_code, void_type_node, + oacc_clauses); + gfc_add_expr_to_block (&block, stmt); + return gfc_finish_block (&block); +} + +static tree +gfc_trans_oacc_wait_directive (gfc_code *code) +{ + stmtblock_t block; + tree stmt, t; + vec *args; + int nparms = 0; + gfc_expr_list *el; + gfc_omp_clauses *clauses = code->ext.omp_clauses; + location_t loc = input_location; + + for (el = clauses->wait_list; el; el = el->next) + nparms++; + + vec_alloc (args, nparms + 2); + stmt = builtin_decl_explicit (BUILT_IN_GOACC_WAIT); + + gfc_start_block (&block); + + if (clauses->async_expr) + t = gfc_convert_expr_to_tree (&block, clauses->async_expr); + else + t = build_int_cst (integer_type_node, -2); + + args->quick_push (t); + args->quick_push (build_int_cst (integer_type_node, nparms)); + + for (el = clauses->wait_list; el; el = el->next) + args->quick_push (gfc_convert_expr_to_tree (&block, el->expr)); + + stmt = build_call_expr_loc_vec (loc, stmt, args); + gfc_add_expr_to_block (&block, stmt); + + vec_free (args); + + return gfc_finish_block (&block); +} static tree gfc_trans_omp_sections (gfc_code *, gfc_omp_clauses *); static tree gfc_trans_omp_workshare (gfc_code *, gfc_omp_clauses *); @@ -3115,6 +3376,7 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, case EXEC_OMP_SIMD: stmt = make_node (OMP_SIMD); break; case EXEC_OMP_DO: stmt = make_node (OMP_FOR); break; case EXEC_OMP_DISTRIBUTE: stmt = make_node (OMP_DISTRIBUTE); break; + case EXEC_OACC_LOOP: stmt = make_node (OACC_LOOP); break; default: gcc_unreachable (); } @@ -3129,6 +3391,68 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, return gfc_finish_block (&block); } +/* parallel loop and kernels loop. */ +static tree +gfc_trans_oacc_combined_directive (gfc_code *code) +{ + stmtblock_t block, *pblock = NULL; + gfc_omp_clauses construct_clauses, loop_clauses; + tree stmt, oacc_clauses = NULL_TREE; + enum tree_code construct_code; + + switch (code->op) + { + case EXEC_OACC_PARALLEL_LOOP: + construct_code = OACC_PARALLEL; + break; + case EXEC_OACC_KERNELS_LOOP: + construct_code = OACC_KERNELS; + break; + default: + gcc_unreachable (); + } + + gfc_start_block (&block); + + memset (&loop_clauses, 0, sizeof (loop_clauses)); + if (code->ext.omp_clauses != NULL) + { + memcpy (&construct_clauses, code->ext.omp_clauses, + sizeof (construct_clauses)); + loop_clauses.collapse = construct_clauses.collapse; + loop_clauses.gang = construct_clauses.gang; + loop_clauses.vector = construct_clauses.vector; + loop_clauses.worker = construct_clauses.worker; + loop_clauses.seq = construct_clauses.seq; + loop_clauses.independent = construct_clauses.independent; + construct_clauses.collapse = 0; + construct_clauses.gang = false; + construct_clauses.vector = false; + construct_clauses.worker = false; + construct_clauses.seq = false; + construct_clauses.independent = false; + oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses, + code->loc); + } + if (!loop_clauses.seq) + pblock = █ + else + pushlevel (); + stmt = gfc_trans_omp_do (code, code->op, pblock, &loop_clauses, NULL); + if (TREE_CODE (stmt) != BIND_EXPR) + stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); + else + poplevel (0, 0); + stmt = build2_loc (input_location, construct_code, void_type_node, stmt, + oacc_clauses); + if (code->op == EXEC_OACC_KERNELS_LOOP) + OACC_KERNELS_COMBINED (stmt) = 1; + else + OACC_PARALLEL_COMBINED (stmt) = 1; + gfc_add_expr_to_block (&block, stmt); + return gfc_finish_block (&block); +} + static tree gfc_trans_omp_flush (void) { @@ -4018,6 +4342,44 @@ gfc_trans_omp_workshare (gfc_code *code, gfc_omp_clauses *clauses) return stmt; } +tree +gfc_trans_oacc_declare (stmtblock_t *block, gfc_namespace *ns) +{ + tree oacc_clauses; + oacc_clauses = gfc_trans_omp_clauses (block, ns->oacc_declare_clauses, + ns->oacc_declare_clauses->loc); + return build1_loc (ns->oacc_declare_clauses->loc.lb->location, + OACC_DECLARE, void_type_node, oacc_clauses); +} + +tree +gfc_trans_oacc_directive (gfc_code *code) +{ + switch (code->op) + { + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_KERNELS_LOOP: + return gfc_trans_oacc_combined_directive (code); + case EXEC_OACC_PARALLEL: + case EXEC_OACC_KERNELS: + case EXEC_OACC_DATA: + case EXEC_OACC_HOST_DATA: + return gfc_trans_oacc_construct (code); + case EXEC_OACC_LOOP: + return gfc_trans_omp_do (code, code->op, NULL, code->ext.omp_clauses, + NULL); + case EXEC_OACC_UPDATE: + case EXEC_OACC_CACHE: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: + return gfc_trans_oacc_executable_directive (code); + case EXEC_OACC_WAIT: + return gfc_trans_oacc_wait_directive (code); + default: + gcc_unreachable (); + } +} + tree gfc_trans_omp_directive (gfc_code *code) { diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 69a10657558..1a4099cadba 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -1378,6 +1378,14 @@ gfc_trans_block_construct (gfc_code* code) gfc_init_block (&body); exit_label = gfc_build_label_decl (NULL_TREE); code->exit_label = exit_label; + + /* Generate !$ACC DECLARE directive. */ + if (ns->oacc_declare_clauses) + { + tree tmp = gfc_trans_oacc_declare (&body, ns); + gfc_add_expr_to_block (&body, tmp); + } + gfc_add_expr_to_block (&body, gfc_trans_code (ns->code)); gfc_add_expr_to_block (&body, build1_v (LABEL_EXPR, exit_label)); diff --git a/gcc/fortran/trans-stmt.h b/gcc/fortran/trans-stmt.h index 3814fc2fb56..2f2a0b3f5b5 100644 --- a/gcc/fortran/trans-stmt.h +++ b/gcc/fortran/trans-stmt.h @@ -65,6 +65,10 @@ tree gfc_trans_deallocate_array (tree); tree gfc_trans_omp_directive (gfc_code *); void gfc_trans_omp_declare_simd (gfc_namespace *); +/* trans-openacc.c */ +tree gfc_trans_oacc_directive (gfc_code *); +tree gfc_trans_oacc_declare (stmtblock_t *block, gfc_namespace *); + /* trans-io.c */ tree gfc_trans_open (gfc_code *); tree gfc_trans_close (gfc_code *); diff --git a/gcc/fortran/trans.c b/gcc/fortran/trans.c index c7aaee80e65..b749783fcaa 100644 --- a/gcc/fortran/trans.c +++ b/gcc/fortran/trans.c @@ -1900,6 +1900,21 @@ trans_code (gfc_code * code, tree cond) res = gfc_trans_omp_directive (code); break; + case EXEC_OACC_CACHE: + case EXEC_OACC_WAIT: + case EXEC_OACC_UPDATE: + case EXEC_OACC_LOOP: + case EXEC_OACC_HOST_DATA: + case EXEC_OACC_DATA: + case EXEC_OACC_KERNELS: + case EXEC_OACC_KERNELS_LOOP: + case EXEC_OACC_PARALLEL: + case EXEC_OACC_PARALLEL_LOOP: + case EXEC_OACC_ENTER_DATA: + case EXEC_OACC_EXIT_DATA: + res = gfc_trans_oacc_directive (code); + break; + default: gfc_internal_error ("gfc_trans_code(): Bad statement code"); } diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index 5650155134a..fdae28db7aa 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -82,6 +82,7 @@ DEF_FUNCTION_TYPE_0 (BT_FN_VOID, BT_VOID) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTR, BT_VOID, BT_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRPTR, BT_VOID, BT_PTR_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR) +DEF_FUNCTION_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT) DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT) @@ -209,3 +210,14 @@ DEF_FUNCTION_TYPE_8 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR, BT_BOOL, BT_UINT, BT_PTR) DEF_FUNCTION_TYPE_VAR_0 (BT_FN_VOID_VAR, BT_VOID) + +DEF_FUNCTION_TYPE_VAR_2 (BT_FN_VOID_INT_INT_VAR, BT_VOID, BT_INT, BT_INT) + +DEF_FUNCTION_TYPE_VAR_8 (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR, + BT_VOID, BT_INT, BT_PTR, BT_SIZE, BT_PTR, BT_PTR, + BT_PTR, BT_INT, BT_INT) + +DEF_FUNCTION_TYPE_VAR_12 (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR, + BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_SIZE, + BT_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT, BT_INT, + BT_INT, BT_INT) diff --git a/gcc/gcc.c b/gcc/gcc.c index 55d0ff2fd87..52d0521f764 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -829,7 +829,7 @@ proper position among the other output files. */ "%X %{o*} %{e*} %{N} %{n} %{r}\ %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!nostartfiles:%S}} " VTABLE_VERIFICATION_SPEC " \ %{static:} %{L*} %(mfwrap) %(link_libgcc) " SANITIZER_EARLY_SPEC " %o\ - %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\ + %{fopenacc|fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\ %{fcilkplus:%:include(libcilkrts.spec)%(link_cilkrts)}\ %{fgnu-tm:%:include(libitm.spec)%(link_itm)}\ %(mflib) " STACK_SPLIT_SPEC "\ @@ -990,7 +990,8 @@ static const char *const multilib_defaults_raw[] = MULTILIB_DEFAULTS; /* Linking to libgomp implies pthreads. This is particularly important for targets that use different start files and suchlike. */ #ifndef GOMP_SELF_SPECS -#define GOMP_SELF_SPECS "%{fopenmp|ftree-parallelize-loops=*: -pthread}" +#define GOMP_SELF_SPECS "%{fopenacc|fopenmp|ftree-parallelize-loops=*: " \ + "-pthread}" #endif /* Likewise for -fgnu-tm. */ diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 21e98c6d22f..2f9671f163b 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -1151,18 +1151,21 @@ dump_gimple_omp_for (pretty_printer *buffer, gomp_for *gs, int spc, int flags) case GF_OMP_FOR_KIND_FOR: kind = ""; break; - case GF_OMP_FOR_KIND_SIMD: - kind = " simd"; - break; - case GF_OMP_FOR_KIND_CILKSIMD: - kind = " cilksimd"; - break; case GF_OMP_FOR_KIND_DISTRIBUTE: kind = " distribute"; break; case GF_OMP_FOR_KIND_CILKFOR: kind = " _Cilk_for"; break; + case GF_OMP_FOR_KIND_OACC_LOOP: + kind = " oacc_loop"; + break; + case GF_OMP_FOR_KIND_SIMD: + kind = " simd"; + break; + case GF_OMP_FOR_KIND_CILKSIMD: + kind = " cilksimd"; + break; default: gcc_unreachable (); } @@ -1188,17 +1191,20 @@ dump_gimple_omp_for (pretty_printer *buffer, gomp_for *gs, int spc, int flags) case GF_OMP_FOR_KIND_FOR: pp_string (buffer, "#pragma omp for"); break; + case GF_OMP_FOR_KIND_DISTRIBUTE: + pp_string (buffer, "#pragma omp distribute"); + break; + case GF_OMP_FOR_KIND_CILKFOR: + break; + case GF_OMP_FOR_KIND_OACC_LOOP: + pp_string (buffer, "#pragma acc loop"); + break; case GF_OMP_FOR_KIND_SIMD: pp_string (buffer, "#pragma omp simd"); break; case GF_OMP_FOR_KIND_CILKSIMD: pp_string (buffer, "#pragma simd"); break; - case GF_OMP_FOR_KIND_DISTRIBUTE: - pp_string (buffer, "#pragma omp distribute"); - break; - case GF_OMP_FOR_KIND_CILKFOR: - break; default: gcc_unreachable (); } @@ -1344,6 +1350,21 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs, case GF_OMP_TARGET_KIND_UPDATE: kind = " update"; break; + case GF_OMP_TARGET_KIND_OACC_KERNELS: + kind = " oacc_kernels"; + break; + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + kind = " oacc_parallel"; + break; + case GF_OMP_TARGET_KIND_OACC_DATA: + kind = " oacc_data"; + break; + case GF_OMP_TARGET_KIND_OACC_UPDATE: + kind = " oacc_update"; + break; + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + kind = " oacc_enter_exit_data"; + break; default: gcc_unreachable (); } @@ -1352,7 +1373,9 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs, dump_gimple_fmt (buffer, spc, flags, "%G%s <%+BODY <%S>%nCLAUSES <", gs, kind, gimple_omp_body (gs)); dump_omp_clauses (buffer, gimple_omp_target_clauses (gs), spc, flags); - dump_gimple_fmt (buffer, spc, flags, " >"); + dump_gimple_fmt (buffer, spc, flags, " >, %T, %T%n>", + gimple_omp_target_child_fn (gs), + gimple_omp_target_data_arg (gs)); } else { @@ -1364,16 +1387,28 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs, pp_string (buffer, " [child fn: "); dump_generic_node (buffer, gimple_omp_target_child_fn (gs), spc, flags, false); - pp_right_bracket (buffer); + pp_string (buffer, " ("); + if (gimple_omp_target_data_arg (gs)) + dump_generic_node (buffer, gimple_omp_target_data_arg (gs), + spc, flags, false); + else + pp_string (buffer, "???"); + pp_string (buffer, ")]"); } - if (!gimple_seq_empty_p (gimple_omp_body (gs))) + gimple_seq body = gimple_omp_body (gs); + if (body && gimple_code (gimple_seq_first_stmt (body)) != GIMPLE_BIND) { newline_and_indent (buffer, spc + 2); - pp_character (buffer, '{'); + pp_left_brace (buffer); pp_newline (buffer); - dump_gimple_seq (buffer, gimple_omp_body (gs), spc + 4, flags); + dump_gimple_seq (buffer, body, spc + 4, flags); newline_and_indent (buffer, spc + 2); - pp_character (buffer, '}'); + pp_right_brace (buffer); + } + else if (body) + { + pp_newline (buffer); + dump_gimple_seq (buffer, body, spc + 2, flags); } } } diff --git a/gcc/gimple.c b/gcc/gimple.c index 9eb7b14fd73..caa1cbd2d09 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -872,8 +872,7 @@ gimple_build_omp_critical (gimple_seq body, tree name) BODY is sequence of statements inside the for loop. KIND is the `for' variant. - CLAUSES, are any of the OMP loop construct's clauses: private, firstprivate, - lastprivate, reductions, ordered, schedule, and nowait. + CLAUSES, are any of the construct's clauses. COLLAPSE is the collapse count. PRE_BODY is the sequence of statements that are loop invariant. */ @@ -1088,7 +1087,8 @@ gimple_build_omp_single (gimple_seq body, tree clauses) /* Build a GIMPLE_OMP_TARGET statement. BODY is the sequence of statements that will be executed. - CLAUSES are any of the OMP target construct's clauses. */ + KIND is the kind of the region. + CLAUSES are any of the construct's clauses. */ gomp_target * gimple_build_omp_target (gimple_seq body, int kind, tree clauses) diff --git a/gcc/gimple.def b/gcc/gimple.def index 0c76ed0a6dd..96602df91fc 100644 --- a/gcc/gimple.def +++ b/gcc/gimple.def @@ -243,6 +243,9 @@ DEFGSCODE(GIMPLE_OMP_CRITICAL, "gimple_omp_critical", GSS_OMP_CRITICAL) for (INDEX = INITIAL; INDEX COND FINAL; INDEX {+=,-=} INCR) BODY + Likewise for: + #pragma acc loop [clause1 ... clauseN] + BODY is the loop body. CLAUSES is the list of clauses. @@ -269,7 +272,7 @@ DEFGSCODE(GIMPLE_OMP_CRITICAL, "gimple_omp_critical", GSS_OMP_CRITICAL) INITIAL, FINAL and INCR are required to be loop invariant integer expressions that are evaluated without any synchronization. The evaluation order, frequency of evaluation and side-effects are - unspecified by the standard. */ + unspecified by the standards. */ DEFGSCODE(GIMPLE_OMP_FOR, "gimple_omp_for", GSS_OMP_FOR) /* GIMPLE_OMP_MASTER represents #pragma omp master. @@ -354,11 +357,12 @@ DEFGSCODE(GIMPLE_OMP_SECTIONS_SWITCH, "gimple_omp_sections_switch", GSS_BASE) DEFGSCODE(GIMPLE_OMP_SINGLE, "gimple_omp_single", GSS_OMP_SINGLE_LAYOUT) /* GIMPLE_OMP_TARGET represents + #pragma acc {kernels,parallel,data,enter data,exit data,update} #pragma omp target {,data,update} - BODY is the sequence of statements inside the target construct - (NULL for target update). + BODY is the sequence of statements inside the construct + (NULL for some variants). CLAUSES is an OMP_CLAUSE chain holding the associated clauses. - CHILD_FN is set when outlining the body of the target region. + CHILD_FN is set when outlining the body of the offloaded region. All the statements in BODY are moved into this newly created function when converting OMP constructs into low-GIMPLE. DATA_ARG is a vec of 3 local variables in the parent function diff --git a/gcc/gimple.h b/gcc/gimple.h index d70e567d8e3..769bad01181 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -89,20 +89,26 @@ enum gf_mask { GF_CALL_CTRL_ALTERING = 1 << 7, GF_CALL_WITH_BOUNDS = 1 << 8, GF_OMP_PARALLEL_COMBINED = 1 << 0, - GF_OMP_FOR_KIND_MASK = 7 << 0, + GF_OMP_FOR_KIND_MASK = (1 << 3) - 1, GF_OMP_FOR_KIND_FOR = 0, GF_OMP_FOR_KIND_DISTRIBUTE = 1, GF_OMP_FOR_KIND_CILKFOR = 2, + GF_OMP_FOR_KIND_OACC_LOOP = 3, /* Flag for SIMD variants of OMP_FOR kinds. */ GF_OMP_FOR_SIMD = 1 << 2, GF_OMP_FOR_KIND_SIMD = GF_OMP_FOR_SIMD | 0, GF_OMP_FOR_KIND_CILKSIMD = GF_OMP_FOR_SIMD | 1, GF_OMP_FOR_COMBINED = 1 << 3, GF_OMP_FOR_COMBINED_INTO = 1 << 4, - GF_OMP_TARGET_KIND_MASK = (1 << 2) - 1, + GF_OMP_TARGET_KIND_MASK = (1 << 3) - 1, GF_OMP_TARGET_KIND_REGION = 0, GF_OMP_TARGET_KIND_DATA = 1, GF_OMP_TARGET_KIND_UPDATE = 2, + GF_OMP_TARGET_KIND_OACC_PARALLEL = 3, + GF_OMP_TARGET_KIND_OACC_KERNELS = 4, + GF_OMP_TARGET_KIND_OACC_DATA = 5, + GF_OMP_TARGET_KIND_OACC_UPDATE = 6, + GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 7, /* True on an GIMPLE_OMP_RETURN statement if the return does not require a thread synchronization via some sort of barrier. The exact barrier @@ -291,7 +297,7 @@ struct GTY((tag("GSS_CALL"))) }; -/* OpenMP statements (#pragma omp). */ +/* OMP statements. */ struct GTY((tag("GSS_OMP"))) gimple_statement_omp : public gimple_statement_base @@ -552,7 +558,8 @@ struct GTY((tag("GSS_OMP_FOR"))) }; -/* GIMPLE_OMP_PARALLEL, GIMPLE_OMP_TARGET */ +/* GIMPLE_OMP_PARALLEL, GIMPLE_OMP_TARGET, GIMPLE_OMP_TASK */ + struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT"))) gimple_statement_omp_parallel_layout : public gimple_statement_omp { @@ -580,7 +587,6 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT"))) || stmt->code == GIMPLE_OMP_TASK. */ }; - /* GIMPLE_OMP_PARALLEL */ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT"))) gomp_parallel : public gimple_statement_omp_taskreg @@ -589,6 +595,7 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT"))) stmt->code == GIMPLE_OMP_PARALLEL. */ }; +/* GIMPLE_OMP_TARGET */ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT"))) gomp_target : public gimple_statement_omp_parallel_layout { @@ -4409,7 +4416,7 @@ gimple_omp_critical_set_name (gomp_critical *crit_stmt, tree name) } -/* Return the kind of OMP for statemement. */ +/* Return the kind of the OMP_FOR statemement G. */ static inline int gimple_omp_for_kind (const_gimple g) @@ -4419,7 +4426,7 @@ gimple_omp_for_kind (const_gimple g) } -/* Set the OMP for kind. */ +/* Set the kind of the OMP_FOR statement G. */ static inline void gimple_omp_for_set_kind (gomp_for *g, int kind) @@ -4429,7 +4436,7 @@ gimple_omp_for_set_kind (gomp_for *g, int kind) } -/* Return true if OMP for statement G has the +/* Return true if OMP_FOR statement G has the GF_OMP_FOR_COMBINED flag set. */ static inline bool @@ -4440,8 +4447,8 @@ gimple_omp_for_combined_p (const_gimple g) } -/* Set the GF_OMP_FOR_COMBINED field in G depending on the boolean - value of COMBINED_P. */ +/* Set the GF_OMP_FOR_COMBINED field in the OMP_FOR statement G depending on + the boolean value of COMBINED_P. */ static inline void gimple_omp_for_set_combined_p (gomp_for *g, bool combined_p) @@ -4453,7 +4460,7 @@ gimple_omp_for_set_combined_p (gomp_for *g, bool combined_p) } -/* Return true if OMP for statement G has the +/* Return true if the OMP_FOR statement G has the GF_OMP_FOR_COMBINED_INTO flag set. */ static inline bool @@ -4464,8 +4471,8 @@ gimple_omp_for_combined_into_p (const_gimple g) } -/* Set the GF_OMP_FOR_COMBINED_INTO field in G depending on the boolean - value of COMBINED_P. */ +/* Set the GF_OMP_FOR_COMBINED_INTO field in the OMP_FOR statement G depending + on the boolean value of COMBINED_P. */ static inline void gimple_omp_for_set_combined_into_p (gomp_for *g, bool combined_p) @@ -4477,7 +4484,7 @@ gimple_omp_for_set_combined_into_p (gomp_for *g, bool combined_p) } -/* Return the clauses associated with OMP_FOR GS. */ +/* Return the clauses associated with the OMP_FOR statement GS. */ static inline tree gimple_omp_for_clauses (const_gimple gs) @@ -4487,7 +4494,8 @@ gimple_omp_for_clauses (const_gimple gs) } -/* Return a pointer to the OMP_FOR GS. */ +/* Return a pointer to the clauses associated with the OMP_FOR statement + GS. */ static inline tree * gimple_omp_for_clauses_ptr (gimple gs) @@ -4497,7 +4505,8 @@ gimple_omp_for_clauses_ptr (gimple gs) } -/* Set CLAUSES to be the list of clauses associated with OMP_FOR GS. */ +/* Set CLAUSES to be the list of clauses associated with the OMP_FOR statement + GS. */ static inline void gimple_omp_for_set_clauses (gimple gs, tree clauses) @@ -4507,7 +4516,7 @@ gimple_omp_for_set_clauses (gimple gs, tree clauses) } -/* Get the collapse count of OMP_FOR GS. */ +/* Get the collapse count of the OMP_FOR statement GS. */ static inline size_t gimple_omp_for_collapse (gimple gs) @@ -4517,7 +4526,30 @@ gimple_omp_for_collapse (gimple gs) } -/* Return the index variable for OMP_FOR GS. */ +/* Return the condition code associated with the OMP_FOR statement GS. */ + +static inline enum tree_code +gimple_omp_for_cond (const_gimple gs, size_t i) +{ + const gomp_for *omp_for_stmt = as_a (gs); + gcc_gimple_checking_assert (i < omp_for_stmt->collapse); + return omp_for_stmt->iter[i].cond; +} + + +/* Set COND to be the condition code for the OMP_FOR statement GS. */ + +static inline void +gimple_omp_for_set_cond (gimple gs, size_t i, enum tree_code cond) +{ + gomp_for *omp_for_stmt = as_a (gs); + gcc_gimple_checking_assert (TREE_CODE_CLASS (cond) == tcc_comparison + && i < omp_for_stmt->collapse); + omp_for_stmt->iter[i].cond = cond; +} + + +/* Return the index variable for the OMP_FOR statement GS. */ static inline tree gimple_omp_for_index (const_gimple gs, size_t i) @@ -4528,7 +4560,7 @@ gimple_omp_for_index (const_gimple gs, size_t i) } -/* Return a pointer to the index variable for OMP_FOR GS. */ +/* Return a pointer to the index variable for the OMP_FOR statement GS. */ static inline tree * gimple_omp_for_index_ptr (gimple gs, size_t i) @@ -4539,7 +4571,7 @@ gimple_omp_for_index_ptr (gimple gs, size_t i) } -/* Set INDEX to be the index variable for OMP_FOR GS. */ +/* Set INDEX to be the index variable for the OMP_FOR statement GS. */ static inline void gimple_omp_for_set_index (gimple gs, size_t i, tree index) @@ -4550,7 +4582,7 @@ gimple_omp_for_set_index (gimple gs, size_t i, tree index) } -/* Return the initial value for OMP_FOR GS. */ +/* Return the initial value for the OMP_FOR statement GS. */ static inline tree gimple_omp_for_initial (const_gimple gs, size_t i) @@ -4561,7 +4593,7 @@ gimple_omp_for_initial (const_gimple gs, size_t i) } -/* Return a pointer to the initial value for OMP_FOR GS. */ +/* Return a pointer to the initial value for the OMP_FOR statement GS. */ static inline tree * gimple_omp_for_initial_ptr (gimple gs, size_t i) @@ -4572,7 +4604,7 @@ gimple_omp_for_initial_ptr (gimple gs, size_t i) } -/* Set INITIAL to be the initial value for OMP_FOR GS. */ +/* Set INITIAL to be the initial value for the OMP_FOR statement GS. */ static inline void gimple_omp_for_set_initial (gimple gs, size_t i, tree initial) @@ -4583,7 +4615,7 @@ gimple_omp_for_set_initial (gimple gs, size_t i, tree initial) } -/* Return the final value for OMP_FOR GS. */ +/* Return the final value for the OMP_FOR statement GS. */ static inline tree gimple_omp_for_final (const_gimple gs, size_t i) @@ -4594,7 +4626,7 @@ gimple_omp_for_final (const_gimple gs, size_t i) } -/* Return a pointer to the final value for OMP_FOR GS. */ +/* Return a pointer to the final value for the OMP_FOR statement GS. */ static inline tree * gimple_omp_for_final_ptr (gimple gs, size_t i) @@ -4605,7 +4637,7 @@ gimple_omp_for_final_ptr (gimple gs, size_t i) } -/* Set FINAL to be the final value for OMP_FOR GS. */ +/* Set FINAL to be the final value for the OMP_FOR statement GS. */ static inline void gimple_omp_for_set_final (gimple gs, size_t i, tree final) @@ -4616,7 +4648,7 @@ gimple_omp_for_set_final (gimple gs, size_t i, tree final) } -/* Return the increment value for OMP_FOR GS. */ +/* Return the increment value for the OMP_FOR statement GS. */ static inline tree gimple_omp_for_incr (const_gimple gs, size_t i) @@ -4627,7 +4659,7 @@ gimple_omp_for_incr (const_gimple gs, size_t i) } -/* Return a pointer to the increment value for OMP_FOR GS. */ +/* Return a pointer to the increment value for the OMP_FOR statement GS. */ static inline tree * gimple_omp_for_incr_ptr (gimple gs, size_t i) @@ -4638,7 +4670,7 @@ gimple_omp_for_incr_ptr (gimple gs, size_t i) } -/* Set INCR to be the increment value for OMP_FOR GS. */ +/* Set INCR to be the increment value for the OMP_FOR statement GS. */ static inline void gimple_omp_for_set_incr (gimple gs, size_t i, tree incr) @@ -5109,7 +5141,7 @@ gimple_omp_target_set_clauses (gomp_target *omp_target_stmt, } -/* Return the kind of OMP target statemement. */ +/* Return the kind of the OMP_TARGET G. */ static inline int gimple_omp_target_kind (const_gimple g) @@ -5119,7 +5151,7 @@ gimple_omp_target_kind (const_gimple g) } -/* Set the OMP target kind. */ +/* Set the kind of the OMP_TARGET G. */ static inline void gimple_omp_target_set_kind (gomp_target *g, int kind) @@ -5279,29 +5311,6 @@ gimple_omp_sections_set_control (gimple gs, tree control) } -/* Set COND to be the condition code for OMP_FOR GS. */ - -static inline void -gimple_omp_for_set_cond (gimple gs, size_t i, enum tree_code cond) -{ - gomp_for *omp_for_stmt = as_a (gs); - gcc_gimple_checking_assert (TREE_CODE_CLASS (cond) == tcc_comparison - && i < omp_for_stmt->collapse); - omp_for_stmt->iter[i].cond = cond; -} - - -/* Return the condition code associated with OMP_FOR GS. */ - -static inline enum tree_code -gimple_omp_for_cond (const_gimple gs, size_t i) -{ - const gomp_for *omp_for_stmt = as_a (gs); - gcc_gimple_checking_assert (i < omp_for_stmt->collapse); - return omp_for_stmt->iter[i].cond; -} - - /* Set the value being stored in an atomic store. */ static inline void @@ -5547,7 +5556,7 @@ gimple_return_set_retbnd (gimple gs, tree retval) } -/* Returns true when the gimple statement STMT is any of the OpenMP types. */ +/* Returns true when the gimple statement STMT is any of the OMP types. */ #define CASE_GIMPLE_OMP \ case GIMPLE_OMP_PARALLEL: \ @@ -5580,6 +5589,64 @@ is_gimple_omp (const_gimple stmt) } } +/* Return true if the OMP gimple statement STMT is any of the OpenACC types + specifically. */ + +static inline bool +is_gimple_omp_oacc (const_gimple stmt) +{ + gcc_assert (is_gimple_omp (stmt)); + switch (gimple_code (stmt)) + { + case GIMPLE_OMP_FOR: + switch (gimple_omp_for_kind (stmt)) + { + case GF_OMP_FOR_KIND_OACC_LOOP: + return true; + default: + return false; + } + case GIMPLE_OMP_TARGET: + switch (gimple_omp_target_kind (stmt)) + { + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + return true; + default: + return false; + } + default: + return false; + } +} + + +/* Return true if the OMP gimple statement STMT is offloaded. */ + +static inline bool +is_gimple_omp_offloaded (const_gimple stmt) +{ + gcc_assert (is_gimple_omp (stmt)); + switch (gimple_code (stmt)) + { + case GIMPLE_OMP_TARGET: + switch (gimple_omp_target_kind (stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + return true; + default: + return false; + } + default: + return false; + } +} + /* Returns TRUE if statement G is a GIMPLE_NOP. */ diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 2472d86002c..3a34f9224dc 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. If not see #include "omp-low.h" #include "gimple-low.h" #include "cilk.h" +#include "gomp-constants.h" #include "langhooks-def.h" /* FIXME: for lhd_set_decl_assembler_name */ #include "tree-pass.h" /* FIXME: only for PROP_gimple_any */ @@ -106,7 +107,10 @@ enum gimplify_omp_var_data GOVD_PRIVATE_OUTER_REF = 1024, GOVD_LINEAR = 2048, GOVD_ALIGNED = 4096, + + /* Flag for GOVD_MAP: don't copy back. */ GOVD_MAP_TO_ONLY = 8192, + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -122,7 +126,9 @@ enum omp_region_type ORT_TASK = 4, ORT_UNTIED_TASK = 5, ORT_TEAMS = 8, + /* Data region. */ ORT_TARGET_DATA = 16, + /* Data region with offloading. */ ORT_TARGET = 32 }; @@ -1560,9 +1566,10 @@ gimplify_case_label_expr (tree *expr_p, gimple_seq *pre_p) struct gimplify_ctx *ctxp; glabel *label_stmt; - /* Invalid OpenMP programs can play Duff's Device type games with + /* Invalid programs can play Duff's Device type games with, for example, #pragma omp parallel. At least in the C front end, we don't - detect such invalid branches until after gimplification. */ + detect such invalid branches until after gimplification, in the + diagnose_omp_blocks pass. */ for (ctxp = gimplify_ctxp; ; ctxp = ctxp->prev_context) if (ctxp->case_labels.exists ()) break; @@ -1791,7 +1798,7 @@ gimplify_var_or_parm_decl (tree *expr_p) return GS_ERROR; } - /* When within an OpenMP context, notice uses of variables. */ + /* When within an OMP context, notice uses of variables. */ if (gimplify_omp_ctxp && omp_notice_variable (gimplify_omp_ctxp, decl, true)) return GS_ALL_DONE; @@ -2260,7 +2267,7 @@ gimplify_arg (tree *arg_p, gimple_seq *pre_p, location_t call_location) return gimplify_expr (arg_p, pre_p, NULL, test, fb); } -/* Don't fold STMT inside ORT_TARGET, because it can break code by adding decl +/* Don't fold inside offloading regions: it can break code by adding decl references that weren't in the source. We'll do it during omplower pass instead. */ @@ -4451,11 +4458,21 @@ is_gimple_stmt (tree t) case CATCH_EXPR: case ASM_EXPR: case STATEMENT_LIST: + case OACC_PARALLEL: + case OACC_KERNELS: + case OACC_DATA: + case OACC_HOST_DATA: + case OACC_DECLARE: + case OACC_UPDATE: + case OACC_ENTER_DATA: + case OACC_EXIT_DATA: + case OACC_CACHE: case OMP_PARALLEL: case OMP_FOR: case OMP_SIMD: case CILK_SIMD: case OMP_DISTRIBUTE: + case OACC_LOOP: case OMP_SECTIONS: case OMP_SECTION: case OMP_SINGLE: @@ -5582,7 +5599,7 @@ omp_firstprivatize_type_sizes (struct gimplify_omp_ctx *ctx, tree type) lang_hooks.types.omp_firstprivatize_type_sizes (ctx, type); } -/* Add an entry for DECL in the OpenMP context CTX with FLAGS. */ +/* Add an entry for DECL in the OMP context CTX with FLAGS. */ static void omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) @@ -5627,9 +5644,12 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) copy into or out of the context. */ if (!(flags & GOVD_LOCAL)) { - nflags = flags & GOVD_MAP - ? GOVD_MAP | GOVD_MAP_TO_ONLY | GOVD_EXPLICIT - : flags & GOVD_PRIVATE ? GOVD_PRIVATE : GOVD_FIRSTPRIVATE; + if (flags & GOVD_MAP) + nflags = GOVD_MAP | GOVD_MAP_TO_ONLY | GOVD_EXPLICIT; + else if (flags & GOVD_PRIVATE) + nflags = GOVD_PRIVATE; + else + nflags = GOVD_FIRSTPRIVATE; nflags |= flags & GOVD_SEEN; t = DECL_VALUE_EXPR (decl); gcc_assert (TREE_CODE (t) == INDIRECT_REF); @@ -5683,7 +5703,7 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags); } -/* Notice a threadprivate variable DECL used in OpenMP context CTX. +/* Notice a threadprivate variable DECL used in OMP context CTX. This just prints out diagnostics about threadprivate variable uses in untied tasks. If DECL2 is non-NULL, prevent this warning on that variable. */ @@ -5725,7 +5745,7 @@ omp_notice_threadprivate_variable (struct gimplify_omp_ctx *ctx, tree decl, return false; } -/* Record the fact that DECL was used within the OpenMP context CTX. +/* Record the fact that DECL was used within the OMP context CTX. IN_CODE is true when real code uses DECL, and false when we should merely emit default(none) errors. Return true if DECL is going to be remapped and thus DECL shouldn't be gimplified into its @@ -6006,7 +6026,7 @@ omp_check_private (struct gimplify_omp_ctx *ctx, tree decl, bool copyprivate) return false; } -/* Scan the OpenMP clauses in *LIST_P, installing mappings into a new +/* Scan the OMP clauses in *LIST_P, installing mappings into a new and previous omp contexts. */ static void @@ -6117,6 +6137,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: + case OMP_CLAUSE__CACHE_: decl = OMP_CLAUSE_DECL (c); if (error_operand_p (decl)) { @@ -6282,15 +6303,35 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_CLAUSE_DIST_SCHEDULE: case OMP_CLAUSE_DEVICE: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: + case OMP_CLAUSE_GANG: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: if (gimplify_expr (&OMP_CLAUSE_OPERAND (c, 0), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) remove = true; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_GANG + && gimplify_expr (&OMP_CLAUSE_OPERAND (c, 1), pre_p, NULL, + is_gimple_val, fb_rvalue) == GS_ERROR) + remove = true; + break; + + case OMP_CLAUSE_DEVICE_RESIDENT: + case OMP_CLAUSE_USE_DEVICE: + case OMP_CLAUSE_INDEPENDENT: + remove = true; break; case OMP_CLAUSE_NOWAIT: case OMP_CLAUSE_ORDERED: case OMP_CLAUSE_UNTIED: case OMP_CLAUSE_COLLAPSE: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: case OMP_CLAUSE_MERGEABLE: case OMP_CLAUSE_PROC_BIND: case OMP_CLAUSE_SAFELEN: @@ -6411,9 +6452,10 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) OMP_CLAUSE_PRIVATE_OUTER_REF (clause) = 1; else if (code == OMP_CLAUSE_MAP) { - OMP_CLAUSE_MAP_KIND (clause) = flags & GOVD_MAP_TO_ONLY - ? OMP_CLAUSE_MAP_TO - : OMP_CLAUSE_MAP_TOFROM; + OMP_CLAUSE_SET_MAP_KIND (clause, + flags & GOVD_MAP_TO_ONLY + ? GOMP_MAP_TO + : GOMP_MAP_TOFROM); if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST) { @@ -6434,7 +6476,7 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) OMP_CLAUSE_MAP); OMP_CLAUSE_DECL (nc) = decl; OMP_CLAUSE_SIZE (nc) = size_zero_node; - OMP_CLAUSE_MAP_KIND (nc) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_POINTER); OMP_CLAUSE_CHAIN (nc) = OMP_CLAUSE_CHAIN (clause); OMP_CLAUSE_CHAIN (clause) = nc; } @@ -6584,8 +6626,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p) remove = true; else if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST - && OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_POINTER) + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_POINTER) { + /* For GOMP_MAP_FORCE_DEVICEPTR, we'll never enter here, because + for these, TREE_CODE (DECL_SIZE (decl)) will always be + INTEGER_CST. */ + gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DEVICEPTR); + tree decl2 = DECL_VALUE_EXPR (decl); gcc_assert (TREE_CODE (decl2) == INDIRECT_REF); decl2 = TREE_OPERAND (decl2, 0); @@ -6603,7 +6650,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p) OMP_CLAUSE_MAP); OMP_CLAUSE_DECL (nc) = decl; OMP_CLAUSE_SIZE (nc) = size_zero_node; - OMP_CLAUSE_MAP_KIND (nc) = OMP_CLAUSE_MAP_POINTER; + OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_POINTER); OMP_CLAUSE_CHAIN (nc) = OMP_CLAUSE_CHAIN (c); OMP_CLAUSE_CHAIN (c) = nc; c = nc; @@ -6614,6 +6661,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: + case OMP_CLAUSE__CACHE_: decl = OMP_CLAUSE_DECL (c); if (!DECL_P (decl)) break; @@ -6659,6 +6707,19 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p) case OMP_CLAUSE_SAFELEN: case OMP_CLAUSE_DEPEND: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_DEVICE_RESIDENT: + case OMP_CLAUSE_USE_DEVICE: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: + case OMP_CLAUSE_GANG: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: break; default: @@ -6681,6 +6742,21 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p) delete_omp_context (ctx); } +/* Gimplify OACC_CACHE. */ + +static void +gimplify_oacc_cache (tree *expr_p, gimple_seq *pre_p) +{ + tree expr = *expr_p; + + gimplify_scan_omp_clauses (&OACC_CACHE_CLAUSES (expr), pre_p, ORT_WORKSHARE); + gimplify_adjust_omp_clauses (pre_p, &OACC_CACHE_CLAUSES (expr)); + + /* TODO: Do something sensible with this information. */ + + *expr_p = NULL_TREE; +} + /* Gimplify the contents of an OMP_PARALLEL statement. This involves gimplification of the body, as well as scanning the body for used variables. We need to do this scan now, because variable-sized @@ -6795,8 +6871,22 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) orig_for_stmt = for_stmt = *expr_p; - simd = (TREE_CODE (for_stmt) == OMP_SIMD - || TREE_CODE (for_stmt) == CILK_SIMD); + switch (TREE_CODE (for_stmt)) + { + case OMP_FOR: + case CILK_FOR: + case OMP_DISTRIBUTE: + case OACC_LOOP: + simd = false; + break; + case OMP_SIMD: + case CILK_SIMD: + simd = true; + break; + default: + gcc_unreachable (); + } + gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, simd ? ORT_SIMD : ORT_WORKSHARE); if (TREE_CODE (for_stmt) == OMP_DISTRIBUTE) @@ -6832,6 +6922,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) if (OMP_FOR_INIT (for_stmt) == NULL_TREE) { + gcc_assert (TREE_CODE (for_stmt) != OACC_LOOP); for_stmt = walk_tree (&OMP_FOR_BODY (for_stmt), find_combined_omp_for, NULL, NULL); gcc_assert (for_stmt != NULL_TREE); @@ -7133,6 +7224,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) case CILK_SIMD: kind = GF_OMP_FOR_KIND_CILKSIMD; break; case CILK_FOR: kind = GF_OMP_FOR_KIND_CILKFOR; break; case OMP_DISTRIBUTE: kind = GF_OMP_FOR_KIND_DISTRIBUTE; break; + case OACC_LOOP: kind = GF_OMP_FOR_KIND_OACC_LOOP; break; default: gcc_unreachable (); } @@ -7173,9 +7265,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) return GS_ALL_DONE; } -/* Gimplify the gross structure of other OpenMP constructs. - In particular, OMP_SECTIONS, OMP_SINGLE, OMP_TARGET, OMP_TARGET_DATA - and OMP_TEAMS. */ +/* Gimplify the gross structure of several OMP constructs. */ static void gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) @@ -7183,16 +7273,20 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) tree expr = *expr_p; gimple stmt; gimple_seq body = NULL; - enum omp_region_type ort = ORT_WORKSHARE; + enum omp_region_type ort; switch (TREE_CODE (expr)) { case OMP_SECTIONS: case OMP_SINGLE: + ort = ORT_WORKSHARE; break; + case OACC_KERNELS: + case OACC_PARALLEL: case OMP_TARGET: ort = ORT_TARGET; break; + case OACC_DATA: case OMP_TARGET_DATA: ort = ORT_TARGET_DATA; break; @@ -7213,9 +7307,21 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) pop_gimplify_context (NULL); if (ort == ORT_TARGET_DATA) { - gimple_seq cleanup = NULL; - tree fn = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_END_DATA); + enum built_in_function end_ix; + switch (TREE_CODE (expr)) + { + case OACC_DATA: + end_ix = BUILT_IN_GOACC_DATA_END; + break; + case OMP_TARGET_DATA: + end_ix = BUILT_IN_GOMP_TARGET_END_DATA; + break; + default: + gcc_unreachable (); + } + tree fn = builtin_decl_explicit (end_ix); g = gimple_build_call (fn, 0); + gimple_seq cleanup = NULL; gimple_seq_add_stmt (&cleanup, g); g = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY); body = NULL; @@ -7228,6 +7334,18 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) switch (TREE_CODE (expr)) { + case OACC_DATA: + stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_DATA, + OMP_CLAUSES (expr)); + break; + case OACC_KERNELS: + stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_KERNELS, + OMP_CLAUSES (expr)); + break; + case OACC_PARALLEL: + stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_PARALLEL, + OMP_CLAUSES (expr)); + break; case OMP_SECTIONS: stmt = gimple_build_omp_sections (body, OMP_CLAUSES (expr)); break; @@ -7253,19 +7371,40 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) *expr_p = NULL_TREE; } -/* Gimplify the gross structure of OpenMP target update construct. */ +/* Gimplify the gross structure of OpenACC enter/exit data, update, and OpenMP + target update constructs. */ static void gimplify_omp_target_update (tree *expr_p, gimple_seq *pre_p) { - tree expr = *expr_p; + tree expr = *expr_p, clauses; + int kind; gomp_target *stmt; - gimplify_scan_omp_clauses (&OMP_TARGET_UPDATE_CLAUSES (expr), pre_p, - ORT_WORKSHARE); - gimplify_adjust_omp_clauses (pre_p, &OMP_TARGET_UPDATE_CLAUSES (expr)); - stmt = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_UPDATE, - OMP_TARGET_UPDATE_CLAUSES (expr)); + switch (TREE_CODE (expr)) + { + case OACC_ENTER_DATA: + clauses = OACC_ENTER_DATA_CLAUSES (expr); + kind = GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA; + break; + case OACC_EXIT_DATA: + clauses = OACC_EXIT_DATA_CLAUSES (expr); + kind = GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA; + break; + case OACC_UPDATE: + clauses = OACC_UPDATE_CLAUSES (expr); + kind = GF_OMP_TARGET_KIND_OACC_UPDATE; + break; + case OMP_TARGET_UPDATE: + clauses = OMP_TARGET_UPDATE_CLAUSES (expr); + kind = GF_OMP_TARGET_KIND_UPDATE; + break; + default: + gcc_unreachable (); + } + gimplify_scan_omp_clauses (&clauses, pre_p, ORT_WORKSHARE); + gimplify_adjust_omp_clauses (pre_p, &clauses); + stmt = gimple_build_omp_target (NULL, kind, clauses); gimplify_seq_add_stmt (pre_p, stmt); *expr_p = NULL_TREE; @@ -7445,7 +7584,7 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p) int subcode = 0; /* Wrap the transaction body in a BIND_EXPR so we have a context - where to put decls for OpenMP. */ + where to put decls for OMP. */ if (TREE_CODE (tbody) != BIND_EXPR) { tree bind = build3 (BIND_EXPR, void_type_node, NULL, tbody, NULL); @@ -8182,7 +8321,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; case RESULT_DECL: - /* When within an OpenMP context, notice uses of variables. */ + /* When within an OMP context, notice uses of variables. */ if (gimplify_omp_ctxp) omp_notice_variable (gimplify_omp_ctxp, *expr_p, true); ret = GS_ALL_DONE; @@ -8208,9 +8347,38 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, case CILK_SIMD: case CILK_FOR: case OMP_DISTRIBUTE: + case OACC_LOOP: ret = gimplify_omp_for (expr_p, pre_p); break; + case OACC_CACHE: + gimplify_oacc_cache (expr_p, pre_p); + ret = GS_ALL_DONE; + break; + + case OACC_HOST_DATA: + case OACC_DECLARE: + sorry ("directive not yet implemented"); + ret = GS_ALL_DONE; + break; + + case OACC_KERNELS: + if (OACC_KERNELS_COMBINED (*expr_p)) + sorry ("directive not yet implemented"); + else + gimplify_omp_workshare (expr_p, pre_p); + ret = GS_ALL_DONE; + break; + + case OACC_PARALLEL: + if (OACC_PARALLEL_COMBINED (*expr_p)) + sorry ("directive not yet implemented"); + else + gimplify_omp_workshare (expr_p, pre_p); + ret = GS_ALL_DONE; + break; + + case OACC_DATA: case OMP_SECTIONS: case OMP_SINGLE: case OMP_TARGET: @@ -8220,6 +8388,9 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = GS_ALL_DONE; break; + case OACC_ENTER_DATA: + case OACC_EXIT_DATA: + case OACC_UPDATE: case OMP_TARGET_UPDATE: gimplify_omp_target_update (expr_p, pre_p); ret = GS_ALL_DONE; @@ -8601,8 +8772,18 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, && code != LOOP_EXPR && code != SWITCH_EXPR && code != TRY_FINALLY_EXPR + && code != OACC_PARALLEL + && code != OACC_KERNELS + && code != OACC_DATA + && code != OACC_HOST_DATA + && code != OACC_DECLARE + && code != OACC_UPDATE + && code != OACC_ENTER_DATA + && code != OACC_EXIT_DATA + && code != OACC_CACHE && code != OMP_CRITICAL && code != OMP_FOR + && code != OACC_LOOP && code != OMP_MASTER && code != OMP_TASKGROUP && code != OMP_ORDERED @@ -8829,7 +9010,7 @@ gimplify_body (tree fndecl, bool do_parms) gcc_assert (gimplify_ctxp == NULL); push_gimplify_context (); - if (flag_openmp) + if (flag_openacc || flag_openmp) { gcc_assert (gimplify_omp_ctxp == NULL); if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl))) @@ -8913,7 +9094,8 @@ gimplify_body (tree fndecl, bool do_parms) nonlocal_vlas = NULL; } - if ((flag_openmp || flag_openmp_simd) && gimplify_omp_ctxp) + if ((flag_openacc || flag_openmp || flag_openmp_simd) + && gimplify_omp_ctxp) { delete_omp_context (gimplify_omp_ctxp); gimplify_omp_ctxp = NULL; diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c index bc265c2b460..cced43b9713 100644 --- a/gcc/lto-streamer-out.c +++ b/gcc/lto-streamer-out.c @@ -82,6 +82,7 @@ along with GCC; see the file COPYING3. If not see #include "streamer-hooks.h" #include "cfgloop.h" #include "builtins.h" +#include "gomp-constants.h" static void lto_write_tree (struct output_block*, tree, bool); diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index 4eafaccc822..9b98edd6378 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,10 @@ +2015-01-15 Thomas Schwinge + James Norris + + * lto-lang.c (DEF_FUNCTION_TYPE_VAR_8, DEF_FUNCTION_TYPE_VAR_12): + New macros. + * lto.c: Include "gomp-constants.h". + 2015-01-14 Ilya Verbin * lto-partition.c (lto_promote_cross_file_statics): Remove argument diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c index 31b0b5caaba..aa474e07d85 100644 --- a/gcc/lto/lto-lang.c +++ b/gcc/lto/lto-lang.c @@ -177,6 +177,11 @@ enum lto_builtin_type #define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME, #define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG6) \ NAME, +#define DEF_FUNCTION_TYPE_VAR_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) NAME, +#define DEF_FUNCTION_TYPE_VAR_12(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, \ + ARG12) NAME, #define DEF_POINTER_TYPE(NAME, TYPE) NAME, #include "builtin-types.def" #undef DEF_PRIMITIVE_TYPE @@ -195,6 +200,8 @@ enum lto_builtin_type #undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_5 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE BT_LAST }; @@ -679,6 +686,14 @@ lto_define_builtins (tree va_list_ref_type_node ATTRIBUTE_UNUSED, def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4); #define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5); +#define DEF_FUNCTION_TYPE_VAR_8(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8) \ + def_fn_type (ENUM, RETURN, 1, 8, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \ + ARG7, ARG8); +#define DEF_FUNCTION_TYPE_VAR_12(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ + ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, ARG12) \ + def_fn_type (ENUM, RETURN, 1, 12, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \ + ARG7, ARG8, ARG9, ARG10, ARG11, ARG12); #define DEF_POINTER_TYPE(ENUM, TYPE) \ builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]); @@ -700,6 +715,8 @@ lto_define_builtins (tree va_list_ref_type_node ATTRIBUTE_UNUSED, #undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_5 +#undef DEF_FUNCTION_TYPE_VAR_8 +#undef DEF_FUNCTION_TYPE_VAR_12 #undef DEF_POINTER_TYPE builtin_types[(int) BT_LAST] = NULL_TREE; diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c index 96e5fd18ac1..15d3f1040bb 100644 --- a/gcc/lto/lto.c +++ b/gcc/lto/lto.c @@ -77,6 +77,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-inline.h" #include "params.h" #include "ipa-utils.h" +#include "gomp-constants.h" /* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver. */ diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 47b6c6afc8a..6aea7b7a97f 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -1,5 +1,5 @@ /* This file contains the definitions and documentation for the - OpenMP builtins used in the GNU compiler. + Offloading and Multi Processing builtins used in the GNU compiler. Copyright (C) 2005-2015 Free Software Foundation, Inc. This file is part of GCC. @@ -20,10 +20,41 @@ along with GCC; see the file COPYING3. If not see /* Before including this file, you should define a macro: + DEF_GOACC_BUILTIN (ENUM, NAME, TYPE, ATTRS) + DEF_GOACC_BUILTIN_COMPILER (ENUM, NAME, TYPE, ATTRS) DEF_GOMP_BUILTIN (ENUM, NAME, TYPE, ATTRS) See builtins.def for details. */ +/* The reason why they aren't in gcc/builtins.def is that the Fortran front end + doesn't source those. */ + +DEF_GOACC_BUILTIN (BUILT_IN_ACC_GET_DEVICE_TYPE, "acc_get_device_type", + BT_FN_INT, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_START, "GOACC_data_start", + BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end", + BT_FN_VOID, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data", + BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR, + ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel", + BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR, + ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", + BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR, + ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", + BT_FN_VOID_INT_INT_VAR, + ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_THREAD_NUM, "GOACC_get_thread_num", + BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_NUM_THREADS, "GOACC_get_num_threads", + BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) + +DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", + BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) + DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_THREAD_NUM, "omp_get_thread_num", BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_NUM_THREADS, "omp_get_num_threads", diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 9ec3d497a30..b7bf338b702 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1,6 +1,7 @@ -/* Lowering pass for OpenMP directives. Converts OpenMP directives - into explicit calls to the runtime library (libgomp) and data - marshalling to implement data sharing and copying clauses. +/* Lowering pass for OMP directives. Converts OMP directives into explicit + calls to the runtime library (libgomp), data marshalling to implement data + sharing and copying clauses, offloading to accelerators, and more. + Contributed by Diego Novillo Copyright (C) 2005-2015 Free Software Foundation, Inc. @@ -106,9 +107,10 @@ along with GCC; see the file COPYING3. If not see #include "cilk.h" #include "context.h" #include "lto-section-names.h" +#include "gomp-constants.h" -/* Lowering of OpenMP parallel and workshare constructs proceeds in two +/* Lowering of OMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements and then for variables that must be replaced to satisfy data sharing clauses. The second phase expands code for the constructs, as well as @@ -116,10 +118,10 @@ along with GCC; see the file COPYING3. If not see expressions. Final code generation is done by pass_expand_omp. The flowgraph is - scanned for parallel regions which are then moved to a new - function, to be invoked by the thread library. */ + scanned for regions which are then moved to a new + function, to be invoked by the thread library, or offloaded. */ -/* Parallel region information. Every parallel and workshare +/* OMP region information. Every parallel and workshare directive is enclosed between two markers, the OMP_* directive and a corresponding OMP_RETURN statement. */ @@ -158,6 +160,12 @@ struct omp_region bool is_combined_parallel; }; +/* Levels of parallelism as defined by OpenACC. Increasing numbers + correspond to deeper loop nesting levels. */ +#define MASK_GANG 1 +#define MASK_WORKER 2 +#define MASK_VECTOR 4 + /* Context structure. Used to store information about each parallel directive in the code. */ @@ -192,6 +200,11 @@ typedef struct omp_context construct. In the case of a parallel, this is in the child function. */ tree block_vars; + /* A map of reduction pointer variables. For accelerators, each + reduction variable is replaced with an array. Each thread, in turn, + is assigned to a slot on that array. */ + splay_tree reduction_map; + /* Label to which GOMP_cancel{,llation_point} and explicit and implicit barriers should jump to during omplower pass. */ tree cancel_label; @@ -210,8 +223,18 @@ typedef struct omp_context /* True if this construct can be cancelled. */ bool cancellable; + + /* For OpenACC loops, a mask of gang, worker and vector used at + levels below this one. */ + int gwv_below; + /* For OpenACC loops, a mask of gang, worker and vector used at + this level and above. For parallel and kernels clauses, a mask + indicating which of num_gangs/num_workers/num_vectors was used. */ + int gwv_this; } omp_context; +/* A structure holding the elements of: + for (V = N1; V cond N2; V += STEP) [...] */ struct omp_for_data_loop { @@ -254,9 +277,93 @@ static tree scan_omp_1_op (tree *, int *, void *); *handled_ops_p = false; \ break; +/* Helper function to get the name of the array containing the partial + reductions for OpenACC reductions. */ +static const char * +oacc_get_reduction_array_id (tree node) +{ + const char *id = IDENTIFIER_POINTER (DECL_NAME (node)); + int len = strlen ("OACC") + strlen (id); + char *temp_name = XALLOCAVEC (char, len + 1); + snprintf (temp_name, len + 1, "OACC%s", id); + return IDENTIFIER_POINTER (get_identifier (temp_name)); +} + +/* Determine the number of threads OpenACC threads used to determine the + size of the array of partial reductions. Currently, this is num_gangs + * vector_length. This value may be different than GOACC_GET_NUM_THREADS, + because it is independed of the device used. */ + +static tree +oacc_max_threads (omp_context *ctx) +{ + tree nthreads, vector_length, gangs, clauses; + + gangs = fold_convert (sizetype, integer_one_node); + vector_length = gangs; + + /* The reduction clause may be nested inside a loop directive. + Scan for the innermost vector_length clause. */ + for (omp_context *oc = ctx; oc; oc = oc->outer) + { + if (gimple_code (oc->stmt) != GIMPLE_OMP_TARGET + || (gimple_omp_target_kind (oc->stmt) + != GF_OMP_TARGET_KIND_OACC_PARALLEL)) + continue; + + clauses = gimple_omp_target_clauses (oc->stmt); + + vector_length = find_omp_clause (clauses, OMP_CLAUSE_VECTOR_LENGTH); + if (vector_length) + vector_length = fold_convert_loc (OMP_CLAUSE_LOCATION (vector_length), + sizetype, + OMP_CLAUSE_VECTOR_LENGTH_EXPR + (vector_length)); + else + vector_length = fold_convert (sizetype, integer_one_node); + + gangs = find_omp_clause (clauses, OMP_CLAUSE_NUM_GANGS); + if (gangs) + gangs = fold_convert_loc (OMP_CLAUSE_LOCATION (gangs), sizetype, + OMP_CLAUSE_NUM_GANGS_EXPR (gangs)); + else + gangs = fold_convert (sizetype, integer_one_node); + + break; + } + + nthreads = fold_build2 (MULT_EXPR, sizetype, gangs, vector_length); + + return nthreads; +} + /* Holds offload tables with decls. */ vec *offload_funcs, *offload_vars; +/* Holds a decl for __OFFLOAD_TABLE__. */ +static GTY(()) tree offload_symbol_decl; + +/* Get the __OFFLOAD_TABLE__ symbol. */ +static tree +get_offload_symbol_decl (void) +{ + if (!offload_symbol_decl) + { + tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier ("__OFFLOAD_TABLE__"), + ptr_type_node); + TREE_ADDRESSABLE (decl) = 1; + TREE_PUBLIC (decl) = 1; + DECL_EXTERNAL (decl) = 1; + DECL_WEAK (decl) = 1; + DECL_ATTRIBUTES (decl) + = tree_cons (get_identifier ("weak"), + NULL_TREE, DECL_ATTRIBUTES (decl)); + offload_symbol_decl = decl; + } + return offload_symbol_decl; +} + /* Convenience function for calling scan_omp_1_op on tree operands. */ static inline tree @@ -275,7 +382,7 @@ static void lower_omp (gimple_seq *, omp_context *); static tree lookup_decl_in_outer_ctx (tree, omp_context *); static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); -/* Find an OpenMP clause of type KIND within CLAUSES. */ +/* Find an OMP clause of type KIND within CLAUSES. */ tree find_omp_clause (tree clauses, enum omp_clause_code kind) @@ -296,16 +403,6 @@ is_parallel_ctx (omp_context *ctx) } -/* Return true if CTX is for an omp target region. */ - -static inline bool -is_targetreg_ctx (omp_context *ctx) -{ - return gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET - && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION; -} - - /* Return true if CTX is for an omp task. */ static inline bool @@ -630,6 +727,15 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd, fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); fd->loop.cond_code = LT_EXPR; } + + /* For OpenACC loops, force a chunk size of one, as this avoids the default + scheduling where several subsequent iterations are being executed by the + same thread. */ + if (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) + { + gcc_assert (fd->chunk_size == NULL_TREE); + fd->chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1); + } } @@ -858,7 +964,18 @@ is_reference (tree decl) return lang_hooks.decls.omp_privatize_by_reference (decl); } -/* Lookup variables in the decl or field splay trees. The "maybe" form +/* Return the type of a decl. If the decl is reference type, + return its base type. */ +static inline tree +get_base_type (tree decl) +{ + tree type = TREE_TYPE (decl); + if (is_reference (decl)) + type = TREE_TYPE (type); + return type; +} + +/* Lookup variables. The "maybe" form allows for the variable form to not have been entered, otherwise we assert that the variable must have been entered. */ @@ -902,6 +1019,23 @@ maybe_lookup_field (tree var, omp_context *ctx) return n ? (tree) n->value : NULL_TREE; } +static inline tree +lookup_oacc_reduction (const char *id, omp_context *ctx) +{ + splay_tree_node n; + n = splay_tree_lookup (ctx->reduction_map, (splay_tree_key) id); + return (tree) n->value; +} + +static inline tree +maybe_lookup_oacc_reduction (tree var, omp_context *ctx) +{ + splay_tree_node n = NULL; + if (ctx->reduction_map) + n = splay_tree_lookup (ctx->reduction_map, (splay_tree_key) var); + return n ? (tree) n->value : NULL_TREE; +} + /* Return true if DECL should be copied by pointer. SHARED_CTX is the parallel context if DECL is to be shared. */ @@ -915,6 +1049,8 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx) when we know the value is not accessible from an outer scope. */ if (shared_ctx) { + gcc_assert (!is_gimple_omp_oacc (shared_ctx->stmt)); + /* ??? Trivially accessible from anywhere. But why would we even be passing an address in this case? Should we simply assert this to be false, or should we have a cleanup pass that removes @@ -1119,6 +1255,8 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx) || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var)); gcc_assert ((mask & 2) == 0 || !ctx->sfield_map || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var)); + gcc_assert ((mask & 3) == 3 + || !is_gimple_omp_oacc (ctx->stmt)); type = TREE_TYPE (var); if (mask & 4) @@ -1395,6 +1533,7 @@ new_omp_context (gimple stmt, omp_context *outer_ctx) ctx->cb = outer_ctx->cb; ctx->cb.block = NULL; ctx->depth = outer_ctx->depth + 1; + ctx->reduction_map = outer_ctx->reduction_map; } else { @@ -1465,6 +1604,11 @@ delete_omp_context (splay_tree_value value) splay_tree_delete (ctx->field_map); if (ctx->sfield_map) splay_tree_delete (ctx->sfield_map); + if (ctx->reduction_map + /* Shared over several omp_contexts. */ + && (ctx->outer == NULL + || ctx->reduction_map != ctx->outer->reduction_map)) + splay_tree_delete (ctx->reduction_map); /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before it produces corrupt debug information. */ @@ -1603,6 +1747,12 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) /* FALLTHRU */ case OMP_CLAUSE_FIRSTPRIVATE: + if (is_gimple_omp_oacc (ctx->stmt)) + { + sorry ("clause not supported yet"); + break; + } + /* FALLTHRU */ case OMP_CLAUSE_REDUCTION: case OMP_CLAUSE_LINEAR: decl = OMP_CLAUSE_DECL (c); @@ -1630,6 +1780,27 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) install_var_field (decl, by_ref, 3, ctx); } install_var_local (decl, ctx); + if (is_gimple_omp_oacc (ctx->stmt) + && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + { + /* Create a decl for the reduction array. */ + tree var = OMP_CLAUSE_DECL (c); + tree type = get_base_type (var); + tree ptype = build_pointer_type (type); + tree array = create_tmp_var (ptype, + oacc_get_reduction_array_id (var)); + omp_context *c = (ctx->field_map ? ctx : ctx->outer); + install_var_field (array, true, 3, c); + install_var_local (array, c); + + /* Insert it into the current context. */ + splay_tree_insert (ctx->reduction_map, (splay_tree_key) + oacc_get_reduction_array_id (var), + (splay_tree_value) array); + splay_tree_insert (ctx->reduction_map, + (splay_tree_key) array, + (splay_tree_value) array); + } break; case OMP_CLAUSE__LOOPTEMP_: @@ -1660,6 +1831,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_DIST_SCHEDULE: case OMP_CLAUSE_DEPEND: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: if (ctx->outer) scan_omp_op (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer); break; @@ -1679,12 +1853,11 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && varpool_node::get_create (decl)->offloadable) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER) + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) { - /* Ignore OMP_CLAUSE_MAP_POINTER kind for arrays in - #pragma omp target data, there is nothing to map for - those. */ - if (gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_DATA + /* Ignore GOMP_MAP_POINTER kind for arrays in regions that are + not offloaded; there is nothing to map for those. */ + if (!is_gimple_omp_offloaded (ctx->stmt) && !POINTER_TYPE_P (TREE_TYPE (decl))) break; } @@ -1704,14 +1877,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) else { if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) install_var_field (decl, true, 7, ctx); else install_var_field (decl, true, 3, ctx); - if (gimple_omp_target_kind (ctx->stmt) - == GF_OMP_TARGET_KIND_REGION) + if (is_gimple_omp_offloaded (ctx->stmt)) install_var_local (decl, ctx); } } @@ -1723,7 +1895,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && nc != NULL_TREE && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP && OMP_CLAUSE_DECL (nc) == base - && OMP_CLAUSE_MAP_KIND (nc) == OMP_CLAUSE_MAP_POINTER + && OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_POINTER && integer_zerop (OMP_CLAUSE_SIZE (nc))) { OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) = 1; @@ -1756,6 +1928,11 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_MERGEABLE: case OMP_CLAUSE_PROC_BIND: case OMP_CLAUSE_SAFELEN: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_GANG: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: break; case OMP_CLAUSE_ALIGNED: @@ -1765,6 +1942,15 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) install_var_local (decl, ctx); break; + case OMP_CLAUSE_DEVICE_RESIDENT: + case OMP_CLAUSE_USE_DEVICE: + case OMP_CLAUSE__CACHE_: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: + sorry ("Clause not supported yet"); + break; + default: gcc_unreachable (); } @@ -1783,8 +1969,14 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; /* FALLTHRU */ - case OMP_CLAUSE_PRIVATE: case OMP_CLAUSE_FIRSTPRIVATE: + if (is_gimple_omp_oacc (ctx->stmt)) + { + sorry ("clause not supported yet"); + break; + } + /* FALLTHRU */ + case OMP_CLAUSE_PRIVATE: case OMP_CLAUSE_REDUCTION: case OMP_CLAUSE_LINEAR: decl = OMP_CLAUSE_DECL (c); @@ -1811,7 +2003,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_MAP: - if (gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_DATA) + if (!is_gimple_omp_offloaded (ctx->stmt)) break; decl = OMP_CLAUSE_DECL (c); if (DECL_P (decl) @@ -1820,7 +2012,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; if (DECL_P (decl)) { - if (OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE && !COMPLETE_TYPE_P (TREE_TYPE (decl))) { @@ -1867,6 +2059,23 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: + case OMP_CLAUSE_GANG: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: + break; + + case OMP_CLAUSE_DEVICE_RESIDENT: + case OMP_CLAUSE_USE_DEVICE: + case OMP_CLAUSE__CACHE_: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: + sorry ("Clause not supported yet"); break; default: @@ -1874,6 +2083,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) } } + gcc_checking_assert (!scan_array_reductions + || !is_gimple_omp_oacc (ctx->stmt)); if (scan_array_reductions) for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION @@ -1956,6 +2167,8 @@ create_omp_child_function (omp_context *ctx, bool task_copy) decl = build_decl (gimple_location (ctx->stmt), FUNCTION_DECL, name, type); + gcc_checking_assert (!is_gimple_omp_oacc (ctx->stmt) + || !task_copy); if (!task_copy) ctx->cb.dst_fn = decl; else @@ -1976,7 +2189,7 @@ create_omp_child_function (omp_context *ctx, bool task_copy) { omp_context *octx; for (octx = ctx; octx; octx = octx->outer) - if (is_targetreg_ctx (octx)) + if (is_gimple_omp_offloaded (octx->stmt)) { cgraph_node::get_create (decl)->offloadable = 1; #ifdef ENABLE_OFFLOADING @@ -2311,17 +2524,84 @@ finish_taskreg_scan (omp_context *ctx) } -/* Scan an OpenMP loop directive. */ +static omp_context * +enclosing_target_ctx (omp_context *ctx) +{ + while (ctx != NULL + && gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET) + ctx = ctx->outer; + gcc_assert (ctx != NULL); + return ctx; +} + +static bool +oacc_loop_or_target_p (gimple stmt) +{ + enum gimple_code outer_type = gimple_code (stmt); + return ((outer_type == GIMPLE_OMP_TARGET + && ((gimple_omp_target_kind (stmt) + == GF_OMP_TARGET_KIND_OACC_PARALLEL) + || (gimple_omp_target_kind (stmt) + == GF_OMP_TARGET_KIND_OACC_KERNELS))) + || (outer_type == GIMPLE_OMP_FOR + && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_OACC_LOOP)); +} + +/* Scan a GIMPLE_OMP_FOR. */ static void scan_omp_for (gomp_for *stmt, omp_context *outer_ctx) { + enum gimple_code outer_type = GIMPLE_ERROR_MARK; omp_context *ctx; size_t i; + tree clauses = gimple_omp_for_clauses (stmt); + + if (outer_ctx) + outer_type = gimple_code (outer_ctx->stmt); ctx = new_omp_context (stmt, outer_ctx); - scan_sharing_clauses (gimple_omp_for_clauses (stmt), ctx); + if (is_gimple_omp_oacc (stmt)) + { + if (outer_ctx && outer_type == GIMPLE_OMP_FOR) + ctx->gwv_this = outer_ctx->gwv_this; + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + int val; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_GANG) + val = MASK_GANG; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WORKER) + val = MASK_WORKER; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR) + val = MASK_VECTOR; + else + continue; + ctx->gwv_this |= val; + if (!outer_ctx) + { + /* Skip; not nested inside a region. */ + continue; + } + if (!oacc_loop_or_target_p (outer_ctx->stmt)) + { + /* Skip; not nested inside an OpenACC region. */ + continue; + } + if (outer_type == GIMPLE_OMP_FOR) + outer_ctx->gwv_below |= val; + if (OMP_CLAUSE_OPERAND (c, 0) != NULL_TREE) + { + omp_context *enclosing = enclosing_target_ctx (outer_ctx); + if (gimple_omp_target_kind (enclosing->stmt) + == GF_OMP_TARGET_KIND_OACC_PARALLEL) + error_at (gimple_location (stmt), + "no arguments allowed to gang, worker and vector clauses inside parallel"); + } + } + } + + scan_sharing_clauses (clauses, ctx); scan_omp (gimple_omp_for_pre_body_ptr (stmt), ctx); for (i = 0; i < gimple_omp_for_collapse (stmt); i++) @@ -2332,6 +2612,19 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx) scan_omp_op (gimple_omp_for_incr_ptr (stmt, i), ctx); } scan_omp (gimple_omp_body_ptr (stmt), ctx); + + if (is_gimple_omp_oacc (stmt)) + { + if (ctx->gwv_this & ctx->gwv_below) + error_at (gimple_location (stmt), + "gang, worker and vector may occur only once in a loop nest"); + else if (ctx->gwv_below != 0 + && ctx->gwv_this > ctx->gwv_below) + error_at (gimple_location (stmt), + "gang, worker and vector must occur in this order in a loop nest"); + if (outer_ctx && outer_type == GIMPLE_OMP_FOR) + outer_ctx->gwv_below |= ctx->gwv_below; + } } /* Scan an OpenMP sections directive. */ @@ -2371,14 +2664,15 @@ scan_omp_single (gomp_single *stmt, omp_context *outer_ctx) layout_type (ctx->record_type); } -/* Scan an OpenMP target{, data, update} directive. */ +/* Scan a GIMPLE_OMP_TARGET. */ static void scan_omp_target (gomp_target *stmt, omp_context *outer_ctx) { omp_context *ctx; tree name; - int kind = gimple_omp_target_kind (stmt); + bool offloaded = is_gimple_omp_offloaded (stmt); + tree clauses = gimple_omp_target_clauses (stmt); ctx = new_omp_context (stmt, outer_ctx); ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); @@ -2390,13 +2684,30 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx) DECL_ARTIFICIAL (name) = 1; DECL_NAMELESS (name) = 1; TYPE_NAME (ctx->record_type) = name; - if (kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded) { + if (is_gimple_omp_oacc (stmt)) + ctx->reduction_map = splay_tree_new (splay_tree_compare_pointers, + 0, 0); + create_omp_child_function (ctx, false); gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn); } - scan_sharing_clauses (gimple_omp_target_clauses (stmt), ctx); + if (is_gimple_omp_oacc (stmt)) + { + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS) + ctx->gwv_this |= MASK_GANG; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS) + ctx->gwv_this |= MASK_WORKER; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH) + ctx->gwv_this |= MASK_VECTOR; + } + } + + scan_sharing_clauses (clauses, ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); if (TYPE_FIELDS (ctx->record_type) == NULL) @@ -2414,7 +2725,7 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx) gcc_assert (DECL_ALIGN (field) == align); #endif layout_type (ctx->record_type); - if (kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded) fixup_child_record_type (ctx); } } @@ -2429,10 +2740,25 @@ scan_omp_teams (gomp_teams *stmt, omp_context *outer_ctx) scan_omp (gimple_omp_body_ptr (stmt), ctx); } -/* Check OpenMP nesting restrictions. */ +/* Check nesting restrictions. */ static bool check_omp_nesting_restrictions (gimple stmt, omp_context *ctx) { + /* No nesting of non-OpenACC STMT (that is, an OpenMP one, or a GOMP builtin) + inside an OpenACC CTX. */ + if (!(is_gimple_omp (stmt) + && is_gimple_omp_oacc (stmt))) + { + for (omp_context *ctx_ = ctx; ctx_ != NULL; ctx_ = ctx_->outer) + if (is_gimple_omp (ctx_->stmt) + && is_gimple_omp_oacc (ctx_->stmt)) + { + error_at (gimple_location (stmt), + "non-OpenACC construct inside of OpenACC region"); + return false; + } + } + if (ctx != NULL) { if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR @@ -2693,19 +3019,74 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx) break; case GIMPLE_OMP_TARGET: for (; ctx != NULL; ctx = ctx->outer) - if (is_targetreg_ctx (ctx)) - { - const char *name; - switch (gimple_omp_target_kind (stmt)) - { - case GF_OMP_TARGET_KIND_REGION: name = "target"; break; - case GF_OMP_TARGET_KIND_DATA: name = "target data"; break; - case GF_OMP_TARGET_KIND_UPDATE: name = "target update"; break; - default: gcc_unreachable (); - } - warning_at (gimple_location (stmt), 0, - "%s construct inside of target region", name); - } + { + if (gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET) + { + if (is_gimple_omp (stmt) + && is_gimple_omp_oacc (stmt) + && is_gimple_omp (ctx->stmt)) + { + error_at (gimple_location (stmt), + "OpenACC construct inside of non-OpenACC region"); + return false; + } + continue; + } + + const char *stmt_name, *ctx_stmt_name; + switch (gimple_omp_target_kind (stmt)) + { + case GF_OMP_TARGET_KIND_REGION: stmt_name = "target"; break; + case GF_OMP_TARGET_KIND_DATA: stmt_name = "target data"; break; + case GF_OMP_TARGET_KIND_UPDATE: stmt_name = "target update"; break; + case GF_OMP_TARGET_KIND_OACC_PARALLEL: stmt_name = "parallel"; break; + case GF_OMP_TARGET_KIND_OACC_KERNELS: stmt_name = "kernels"; break; + case GF_OMP_TARGET_KIND_OACC_DATA: stmt_name = "data"; break; + case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break; + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: stmt_name = "enter/exit data"; break; + default: gcc_unreachable (); + } + switch (gimple_omp_target_kind (ctx->stmt)) + { + case GF_OMP_TARGET_KIND_REGION: ctx_stmt_name = "target"; break; + case GF_OMP_TARGET_KIND_DATA: ctx_stmt_name = "target data"; break; + case GF_OMP_TARGET_KIND_OACC_PARALLEL: ctx_stmt_name = "parallel"; break; + case GF_OMP_TARGET_KIND_OACC_KERNELS: ctx_stmt_name = "kernels"; break; + case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break; + default: gcc_unreachable (); + } + + /* OpenACC/OpenMP mismatch? */ + if (is_gimple_omp_oacc (stmt) + != is_gimple_omp_oacc (ctx->stmt)) + { + error_at (gimple_location (stmt), + "%s %s construct inside of %s %s region", + (is_gimple_omp_oacc (stmt) + ? "OpenACC" : "OpenMP"), stmt_name, + (is_gimple_omp_oacc (ctx->stmt) + ? "OpenACC" : "OpenMP"), ctx_stmt_name); + return false; + } + if (is_gimple_omp_offloaded (ctx->stmt)) + { + /* No GIMPLE_OMP_TARGET inside offloaded OpenACC CTX. */ + if (is_gimple_omp_oacc (ctx->stmt)) + { + error_at (gimple_location (stmt), + "%s construct inside of %s region", + stmt_name, ctx_stmt_name); + return false; + } + else + { + gcc_checking_assert (!is_gimple_omp_oacc (stmt)); + warning_at (gimple_location (stmt), 0, + "%s construct inside of %s region", + stmt_name, ctx_stmt_name); + } + } + } break; default: break; @@ -2717,7 +3098,7 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx) /* Helper function scan_omp. Callback for walk_tree or operators in walk_gimple_stmt used to - scan for OpenMP directives in TP. */ + scan for OMP directives in TP. */ static tree scan_omp_1_op (tree *tp, int *walk_subtrees, void *data) @@ -2780,7 +3161,7 @@ setjmp_or_longjmp_p (const_tree fndecl) /* Helper function for scan_omp. - Callback for walk_gimple_stmt used to scan for OpenMP directives in + Callback for walk_gimple_stmt used to scan for OMP directives in the current statement in GSI. */ static tree @@ -2793,7 +3174,7 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, if (gimple_has_location (stmt)) input_location = gimple_location (stmt); - /* Check the OpenMP nesting restrictions. */ + /* Check the nesting restrictions. */ bool remove = false; if (is_gimple_omp (stmt)) remove = !check_omp_nesting_restrictions (stmt, ctx); @@ -2901,7 +3282,7 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, /* Scan all the statements starting at the current statement. CTX - contains context information about the OpenMP directives and + contains context information about the OMP directives and clauses found during the scan. */ static void @@ -4086,6 +4467,57 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, gimple_seq_add_stmt (stmt_list, gimple_build_label (label)); } +static void +oacc_lower_reduction_var_helper (gimple_seq *stmt_seqp, omp_context *ctx, + tree tid, tree var, tree new_var) +{ + /* The atomic add at the end of the sum creates unnecessary + write contention on accelerators. To work around this, + create an array to store the partial reductions. Later, in + lower_omp_for (for openacc), the values of array will be + combined. */ + + tree t = NULL_TREE, array, x; + tree type = get_base_type (var); + gimple stmt; + + /* Now insert the partial reductions into the array. */ + + /* Find the reduction array. */ + + tree ptype = build_pointer_type (type); + + t = lookup_oacc_reduction (oacc_get_reduction_array_id (var), ctx); + t = build_receiver_ref (t, false, ctx->outer); + + array = create_tmp_var (ptype); + gimplify_assign (array, t, stmt_seqp); + + tree ptr = create_tmp_var (TREE_TYPE (array)); + + /* Find the reduction array. */ + + /* testing a unary conversion. */ + tree offset = create_tmp_var (sizetype); + gimplify_assign (offset, TYPE_SIZE_UNIT (type), + stmt_seqp); + t = create_tmp_var (sizetype); + gimplify_assign (t, unshare_expr (fold_build1 (NOP_EXPR, sizetype, tid)), + stmt_seqp); + stmt = gimple_build_assign (offset, MULT_EXPR, offset, t); + gimple_seq_add_stmt (stmt_seqp, stmt); + + /* Offset expression. Does the POINTER_PLUS_EXPR take care + of adding sizeof(var) to the array? */ + ptr = create_tmp_var (ptype); + stmt = gimple_build_assign (unshare_expr (ptr), POINTER_PLUS_EXPR, array, + offset); + gimple_seq_add_stmt (stmt_seqp, stmt); + + /* Move the local sum to gfc$sum[i]. */ + x = unshare_expr (build_simple_mem_ref (ptr)); + stmt = gimplify_assign (x, new_var, stmt_seqp); +} /* Generate code to implement the REDUCTION clauses. */ @@ -4094,7 +4526,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) { gimple_seq sub_seq = NULL; gimple stmt; - tree x, c; + tree x, c, tid = NULL_TREE; int count = 0; /* SIMD reductions are handled in lower_rec_input_clauses. */ @@ -4119,6 +4551,17 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) if (count == 0) return; + /* Initialize thread info for OpenACC. */ + if (is_gimple_omp_oacc (ctx->stmt)) + { + /* Get the current thread id. */ + tree call = builtin_decl_explicit (BUILT_IN_GOACC_GET_THREAD_NUM); + tid = create_tmp_var (TREE_TYPE (TREE_TYPE (call))); + gimple stmt = gimple_build_call (call, 0); + gimple_call_set_lhs (stmt, tid); + gimple_seq_add_stmt (stmt_seqp, stmt); + } + for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) { tree var, ref, new_var; @@ -4140,7 +4583,13 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) if (code == MINUS_EXPR) code = PLUS_EXPR; - if (count == 1) + if (is_gimple_omp_oacc (ctx->stmt)) + { + gcc_checking_assert (!OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)); + + oacc_lower_reduction_var_helper (stmt_seqp, ctx, tid, var, new_var); + } + else if (count == 1) { tree addr = build_fold_addr_expr_loc (clause_loc, ref); @@ -4151,8 +4600,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) gimplify_and_add (x, stmt_seqp); return; } - - if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) { tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); @@ -4175,6 +4623,9 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) } } + if (is_gimple_omp_oacc (ctx->stmt)) + return; + stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START), 0); gimple_seq_add_stmt (stmt_seqp, stmt); @@ -6065,12 +6516,14 @@ expand_omp_for_static_nochunk (struct omp_region *region, basic_block fin_bb; gimple_stmt_iterator gsi; edge ep; - enum built_in_function get_num_threads = BUILT_IN_OMP_GET_NUM_THREADS; - enum built_in_function get_thread_num = BUILT_IN_OMP_GET_THREAD_NUM; bool broken_loop = region->cont == NULL; tree *counts = NULL; tree n1, n2, step; + gcc_checking_assert ((gimple_omp_for_kind (fd->for_stmt) + != GF_OMP_FOR_KIND_OACC_LOOP) + || !inner_stmt); + itype = type = TREE_TYPE (fd->loop.v); if (POINTER_TYPE_P (type)) itype = signed_type_for (type); @@ -6094,12 +6547,6 @@ expand_omp_for_static_nochunk (struct omp_region *region, gsi = gsi_last_bb (entry_bb); gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) - { - get_num_threads = BUILT_IN_OMP_GET_NUM_TEAMS; - get_thread_num = BUILT_IN_OMP_GET_TEAM_NUM; - } - if (fd->collapse > 1) { int first_zero_iter = -1; @@ -6158,14 +6605,30 @@ expand_omp_for_static_nochunk (struct omp_region *region, gsi = gsi_last_bb (entry_bb); } - t = build_call_expr (builtin_decl_explicit (get_num_threads), 0); - t = fold_convert (itype, t); - nthreads = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + switch (gimple_omp_for_kind (fd->for_stmt)) + { + case GF_OMP_FOR_KIND_FOR: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + break; + case GF_OMP_FOR_KIND_DISTRIBUTE: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); + break; + case GF_OMP_FOR_KIND_OACC_LOOP: + nthreads = builtin_decl_explicit (BUILT_IN_GOACC_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_GOACC_GET_THREAD_NUM); + break; + default: + gcc_unreachable (); + } + nthreads = build_call_expr (nthreads, 0); + nthreads = fold_convert (itype, nthreads); + nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, true, GSI_SAME_STMT); - - t = build_call_expr (builtin_decl_explicit (get_thread_num), 0); - t = fold_convert (itype, t); - threadid = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + threadid = build_call_expr (threadid, 0); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, true, GSI_SAME_STMT); n1 = fd->loop.n1; @@ -6339,7 +6802,10 @@ expand_omp_for_static_nochunk (struct omp_region *region, if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) { t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) + gcc_checking_assert (t == NULL_TREE); + else + gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); } gsi_remove (&gsi, true); @@ -6440,12 +6906,14 @@ expand_omp_for_static_chunk (struct omp_region *region, basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; gimple_stmt_iterator gsi; edge se; - enum built_in_function get_num_threads = BUILT_IN_OMP_GET_NUM_THREADS; - enum built_in_function get_thread_num = BUILT_IN_OMP_GET_THREAD_NUM; bool broken_loop = region->cont == NULL; tree *counts = NULL; tree n1, n2, step; + gcc_checking_assert ((gimple_omp_for_kind (fd->for_stmt) + != GF_OMP_FOR_KIND_OACC_LOOP) + || !inner_stmt); + itype = type = TREE_TYPE (fd->loop.v); if (POINTER_TYPE_P (type)) itype = signed_type_for (type); @@ -6473,12 +6941,6 @@ expand_omp_for_static_chunk (struct omp_region *region, gsi = gsi_last_bb (entry_bb); gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) - { - get_num_threads = BUILT_IN_OMP_GET_NUM_TEAMS; - get_thread_num = BUILT_IN_OMP_GET_TEAM_NUM; - } - if (fd->collapse > 1) { int first_zero_iter = -1; @@ -6537,14 +6999,30 @@ expand_omp_for_static_chunk (struct omp_region *region, gsi = gsi_last_bb (entry_bb); } - t = build_call_expr (builtin_decl_explicit (get_num_threads), 0); - t = fold_convert (itype, t); - nthreads = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + switch (gimple_omp_for_kind (fd->for_stmt)) + { + case GF_OMP_FOR_KIND_FOR: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + break; + case GF_OMP_FOR_KIND_DISTRIBUTE: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); + break; + case GF_OMP_FOR_KIND_OACC_LOOP: + nthreads = builtin_decl_explicit (BUILT_IN_GOACC_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_GOACC_GET_THREAD_NUM); + break; + default: + gcc_unreachable (); + } + nthreads = build_call_expr (nthreads, 0); + nthreads = fold_convert (itype, nthreads); + nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, true, GSI_SAME_STMT); - - t = build_call_expr (builtin_decl_explicit (get_thread_num), 0); - t = fold_convert (itype, t); - threadid = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + threadid = build_call_expr (threadid, 0); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, true, GSI_SAME_STMT); n1 = fd->loop.n1; @@ -6735,7 +7213,10 @@ expand_omp_for_static_chunk (struct omp_region *region, if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) { t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) + gcc_checking_assert (t == NULL_TREE); + else + gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); } gsi_remove (&gsi, true); @@ -7350,7 +7831,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) } -/* Expand the OpenMP loop defined by REGION. */ +/* Expand the OMP loop defined by REGION. */ static void expand_omp_for (struct omp_region *region, gimple inner_stmt) @@ -8290,56 +8771,83 @@ expand_omp_atomic (struct omp_region *region) } -/* Expand the OpenMP target{, data, update} directive starting at REGION. */ +/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ static void expand_omp_target (struct omp_region *region) { basic_block entry_bb, exit_bb, new_bb; - struct function *child_cfun = NULL; - tree child_fn = NULL_TREE, block, t; + struct function *child_cfun; + tree child_fn, block, t; gimple_stmt_iterator gsi; gomp_target *entry_stmt; gimple stmt; edge e; + bool offloaded, data_region; entry_stmt = as_a (last_stmt (region->entry)); new_bb = region->entry; - int kind = gimple_omp_target_kind (entry_stmt); - if (kind == GF_OMP_TARGET_KIND_REGION) + + offloaded = is_gimple_omp_offloaded (entry_stmt); + switch (gimple_omp_target_kind (entry_stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + data_region = false; + break; + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA: + data_region = true; + break; + default: + gcc_unreachable (); + } + + child_fn = NULL_TREE; + child_cfun = NULL; + if (offloaded) { child_fn = gimple_omp_target_child_fn (entry_stmt); child_cfun = DECL_STRUCT_FUNCTION (child_fn); } + /* Supported by expand_omp_taskreg, but not here. */ + if (child_cfun != NULL) + gcc_checking_assert (!child_cfun->cfg); + gcc_checking_assert (!gimple_in_ssa_p (cfun)); + entry_bb = region->entry; exit_bb = region->exit; - if (kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded) { unsigned srcidx, dstidx, num; - /* If the target region needs data sent from the parent + /* If the offloading region needs data sent from the parent function, then the very first statement (except possible - tree profile counter updates) of the parallel body + tree profile counter updates) of the offloading body is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since &.OMP_DATA_O is passed as an argument to the child function, we need to replace it with the argument as seen by the child function. In most cases, this will end up being the identity assignment - .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had + .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had a function call that has been inlined, the original PARM_DECL .OMP_DATA_I may have been converted into a different local variable. In which case, we need to keep the assignment. */ - if (gimple_omp_target_data_arg (entry_stmt)) + tree data_arg = gimple_omp_target_data_arg (entry_stmt); + if (data_arg) { basic_block entry_succ_bb = single_succ (entry_bb); gimple_stmt_iterator gsi; tree arg; gimple tgtcopy_stmt = NULL; - tree sender - = TREE_VEC_ELT (gimple_omp_target_data_arg (entry_stmt), 0); + tree sender = TREE_VEC_ELT (data_arg, 0); for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) { @@ -8374,7 +8882,7 @@ expand_omp_target (struct omp_region *region) /* Declare local variables needed in CHILD_CFUN. */ block = DECL_INITIAL (child_fn); BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); - /* The gimplifier could record temporaries in target block + /* The gimplifier could record temporaries in the offloading block rather than in containing function's local_decls chain, which would mean cgraph missed finalizing them. Do it now. */ for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) @@ -8391,13 +8899,12 @@ expand_omp_target (struct omp_region *region) for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) DECL_CONTEXT (t) = child_fn; - /* Split ENTRY_BB at GIMPLE_OMP_TARGET, + /* Split ENTRY_BB at GIMPLE_*, so that it can be moved to the child function. */ gsi = gsi_last_bb (entry_bb); stmt = gsi_stmt (gsi); - gcc_assert (stmt && gimple_code (stmt) == GIMPLE_OMP_TARGET - && gimple_omp_target_kind (stmt) - == GF_OMP_TARGET_KIND_REGION); + gcc_assert (stmt + && gimple_code (stmt) == gimple_code (entry_stmt)); gsi_remove (&gsi, true); e = split_block (entry_bb, stmt); entry_bb = e->dest; @@ -8414,7 +8921,7 @@ expand_omp_target (struct omp_region *region) gsi_remove (&gsi, true); } - /* Move the target region into CHILD_CFUN. */ + /* Move the offloading region into CHILD_CFUN. */ block = gimple_block (entry_stmt); @@ -8478,25 +8985,46 @@ expand_omp_target (struct omp_region *region) pop_cfun (); } - /* Emit a library call to launch the target region, or do data + /* Emit a library call to launch the offloading region, or do data transfers. */ tree t1, t2, t3, t4, device, cond, c, clauses; enum built_in_function start_ix; location_t clause_loc; + switch (gimple_omp_target_kind (entry_stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + start_ix = BUILT_IN_GOMP_TARGET; + break; + case GF_OMP_TARGET_KIND_DATA: + start_ix = BUILT_IN_GOMP_TARGET_DATA; + break; + case GF_OMP_TARGET_KIND_UPDATE: + start_ix = BUILT_IN_GOMP_TARGET_UPDATE; + break; + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + start_ix = BUILT_IN_GOACC_PARALLEL; + break; + case GF_OMP_TARGET_KIND_OACC_DATA: + start_ix = BUILT_IN_GOACC_DATA_START; + break; + case GF_OMP_TARGET_KIND_OACC_UPDATE: + start_ix = BUILT_IN_GOACC_UPDATE; + break; + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; + break; + default: + gcc_unreachable (); + } + clauses = gimple_omp_target_clauses (entry_stmt); - if (kind == GF_OMP_TARGET_KIND_REGION) - start_ix = BUILT_IN_GOMP_TARGET; - else if (kind == GF_OMP_TARGET_KIND_DATA) - start_ix = BUILT_IN_GOMP_TARGET_DATA; - else - start_ix = BUILT_IN_GOMP_TARGET_UPDATE; - - /* By default, the value of DEVICE is -1 (let runtime library choose) - and there is no conditional. */ + /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime + library choose) and there is no conditional. */ cond = NULL_TREE; - device = build_int_cst (integer_type_node, -1); + device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); c = find_omp_clause (clauses, OMP_CLAUSE_IF); if (c) @@ -8505,6 +9033,12 @@ expand_omp_target (struct omp_region *region) c = find_omp_clause (clauses, OMP_CLAUSE_DEVICE); if (c) { + /* Even if we pass it to all library function calls, it is currently only + defined/used for the OpenMP target ones. */ + gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET + || start_ix == BUILT_IN_GOMP_TARGET_DATA + || start_ix == BUILT_IN_GOMP_TARGET_UPDATE); + device = OMP_CLAUSE_DEVICE_ID (c); clause_loc = OMP_CLAUSE_LOCATION (c); } @@ -8515,7 +9049,7 @@ expand_omp_target (struct omp_region *region) device = fold_convert_loc (clause_loc, integer_type_node, device); /* If we found the clause 'if (cond)', build - (cond ? device : -2). */ + (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ if (cond) { cond = gimple_boolify (cond); @@ -8525,14 +9059,14 @@ expand_omp_target (struct omp_region *region) tree tmp_var; tmp_var = create_tmp_var (TREE_TYPE (device)); - if (kind != GF_OMP_TARGET_KIND_REGION) + if (offloaded) + e = split_block (new_bb, NULL); + else { gsi = gsi_last_bb (new_bb); gsi_prev (&gsi); e = split_block (new_bb, gsi_stmt (gsi)); } - else - e = split_block (new_bb, NULL); cond_bb = e->src; new_bb = e->dest; remove_edge (e); @@ -8552,7 +9086,8 @@ expand_omp_target (struct omp_region *region) gsi = gsi_start_bb (else_bb); stmt = gimple_build_assign (tmp_var, - build_int_cst (integer_type_node, -2)); + build_int_cst (integer_type_node, + GOMP_DEVICE_HOST_FALLBACK)); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); @@ -8584,28 +9119,124 @@ expand_omp_target (struct omp_region *region) } gimple g; - /* FIXME: This will be address of - extern char __OPENMP_TARGET__[] __attribute__((visibility ("hidden"))) - symbol, as soon as the linker plugin is able to create it for us. */ - tree openmp_target = build_zero_cst (ptr_type_node); - if (kind == GF_OMP_TARGET_KIND_REGION) + tree offload_table = get_offload_symbol_decl (); + vec *args; + /* The maximum number used by any start_ix, without varargs. */ + unsigned int argcnt = 12; + + vec_alloc (args, argcnt); + args->quick_push (device); + if (offloaded) + args->quick_push (build_fold_addr_expr (child_fn)); + args->quick_push (build_fold_addr_expr (offload_table)); + args->quick_push (t1); + args->quick_push (t2); + args->quick_push (t3); + args->quick_push (t4); + switch (start_ix) { - tree fnaddr = build_fold_addr_expr (child_fn); - g = gimple_build_call (builtin_decl_explicit (start_ix), 7, - device, fnaddr, openmp_target, t1, t2, t3, t4); + case BUILT_IN_GOACC_DATA_START: + case BUILT_IN_GOMP_TARGET: + case BUILT_IN_GOMP_TARGET_DATA: + case BUILT_IN_GOMP_TARGET_UPDATE: + break; + case BUILT_IN_GOACC_PARALLEL: + { + tree t_num_gangs, t_num_workers, t_vector_length; + + /* Default values for num_gangs, num_workers, and vector_length. */ + t_num_gangs = t_num_workers = t_vector_length + = fold_convert_loc (gimple_location (entry_stmt), + integer_type_node, integer_one_node); + /* ..., but if present, use the value specified by the respective + clause, making sure that are of the correct type. */ + c = find_omp_clause (clauses, OMP_CLAUSE_NUM_GANGS); + if (c) + t_num_gangs = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_NUM_GANGS_EXPR (c)); + c = find_omp_clause (clauses, OMP_CLAUSE_NUM_WORKERS); + if (c) + t_num_workers = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_NUM_WORKERS_EXPR (c)); + c = find_omp_clause (clauses, OMP_CLAUSE_VECTOR_LENGTH); + if (c) + t_vector_length = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_VECTOR_LENGTH_EXPR (c)); + args->quick_push (t_num_gangs); + args->quick_push (t_num_workers); + args->quick_push (t_vector_length); + } + /* FALLTHRU */ + case BUILT_IN_GOACC_ENTER_EXIT_DATA: + case BUILT_IN_GOACC_UPDATE: + { + tree t_async; + int t_wait_idx; + + /* Default values for t_async. */ + t_async = fold_convert_loc (gimple_location (entry_stmt), + integer_type_node, + build_int_cst (integer_type_node, + GOMP_ASYNC_SYNC)); + /* ..., but if present, use the value specified by the respective + clause, making sure that is of the correct type. */ + c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC); + if (c) + t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_ASYNC_EXPR (c)); + + args->quick_push (t_async); + /* Save the index, and... */ + t_wait_idx = args->length (); + /* ... push a default value. */ + args->quick_push (fold_convert_loc (gimple_location (entry_stmt), + integer_type_node, + integer_zero_node)); + c = find_omp_clause (clauses, OMP_CLAUSE_WAIT); + if (c) + { + int n = 0; + + for (; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) + { + args->safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_WAIT_EXPR (c))); + n++; + } + } + + /* Now that we know the number, replace the default value. */ + args->ordered_remove (t_wait_idx); + args->quick_insert (t_wait_idx, + fold_convert_loc (gimple_location (entry_stmt), + integer_type_node, + build_int_cst (integer_type_node, n))); + } + } + break; + default: + gcc_unreachable (); } - else - g = gimple_build_call (builtin_decl_explicit (start_ix), 6, - device, openmp_target, t1, t2, t3, t4); + + g = gimple_build_call_vec (builtin_decl_explicit (start_ix), *args); + args->release (); gimple_set_location (g, gimple_location (entry_stmt)); gsi_insert_before (&gsi, g, GSI_SAME_STMT); - if (kind != GF_OMP_TARGET_KIND_REGION) + if (!offloaded) { g = gsi_stmt (gsi); gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); gsi_remove (&gsi, true); } - if (kind == GF_OMP_TARGET_KIND_DATA && region->exit) + if (data_region + && region->exit) { gsi = gsi_last_bb (region->exit); g = gsi_stmt (gsi); @@ -8737,7 +9368,6 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, region->exit = bb; parent = parent->outer; } - else if (code == GIMPLE_OMP_CONTINUE) { gcc_assert (parent); @@ -8747,17 +9377,34 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, { /* GIMPLE_OMP_SECTIONS_SWITCH is part of GIMPLE_OMP_SECTIONS, and we do nothing for it. */ - ; } - else if (code == GIMPLE_OMP_TARGET - && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_UPDATE) - new_omp_region (bb, code, parent); else { - /* Otherwise, this directive becomes the parent for a new - region. */ region = new_omp_region (bb, code, parent); - parent = region; + /* Otherwise... */ + if (code == GIMPLE_OMP_TARGET) + { + switch (gimple_omp_target_kind (stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_DATA: + break; + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + /* ..., other than for those stand-alone directives... */ + region = NULL; + break; + default: + gcc_unreachable (); + } + } + /* ..., this directive becomes the parent for a new region. */ + if (region) + parent = region; } } @@ -8866,11 +9513,12 @@ public: /* opt_pass methods: */ virtual unsigned int execute (function *) { - bool gate = ((flag_openmp != 0 || flag_openmp_simd != 0 - || flag_cilkplus != 0) && !seen_error ()); + bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0 + || flag_openmp_simd != 0) + && !seen_error ()); /* This pass always runs, to provide PROP_gimple_eomp. - But there is nothing to do unless -fopenmp is given. */ + But often, there is nothing to do. */ if (!gate) return 0; @@ -8926,7 +9574,394 @@ make_pass_expand_omp_ssa (gcc::context *ctxt) return new pass_expand_omp_ssa (ctxt); } -/* Routines to lower OpenMP directives into OMP-GIMPLE. */ +/* Routines to lower OMP directives into OMP-GIMPLE. */ + +/* Helper function to preform, potentially COMPLEX_TYPE, operation and + convert it to gimple. */ +static void +oacc_gimple_assign (tree dest, tree_code op, tree src, gimple_seq *seq) +{ + gimple stmt; + + if (TREE_CODE (TREE_TYPE (dest)) != COMPLEX_TYPE) + { + stmt = gimple_build_assign (dest, op, dest, src); + gimple_seq_add_stmt (seq, stmt); + return; + } + + tree t = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + tree rdest = fold_build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (dest)), dest); + gimplify_assign (t, rdest, seq); + rdest = t; + + t = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + tree idest = fold_build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (dest)), dest); + gimplify_assign (t, idest, seq); + idest = t; + + t = create_tmp_var (TREE_TYPE (TREE_TYPE (src))); + tree rsrc = fold_build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (src)), src); + gimplify_assign (t, rsrc, seq); + rsrc = t; + + t = create_tmp_var (TREE_TYPE (TREE_TYPE (src))); + tree isrc = fold_build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (src)), src); + gimplify_assign (t, isrc, seq); + isrc = t; + + tree r = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + tree i = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + tree result; + + if (op == PLUS_EXPR) + { + stmt = gimple_build_assign (r, op, rdest, rsrc); + gimple_seq_add_stmt (seq, stmt); + + stmt = gimple_build_assign (i, op, idest, isrc); + gimple_seq_add_stmt (seq, stmt); + } + else if (op == MULT_EXPR) + { + /* Let x = a + ib = dest, y = c + id = src. + x * y = (ac - bd) + i(ad + bc) */ + tree ac = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + tree bd = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + tree ad = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + tree bc = create_tmp_var (TREE_TYPE (TREE_TYPE (dest))); + + stmt = gimple_build_assign (ac, MULT_EXPR, rdest, rsrc); + gimple_seq_add_stmt (seq, stmt); + + stmt = gimple_build_assign (bd, MULT_EXPR, idest, isrc); + gimple_seq_add_stmt (seq, stmt); + + stmt = gimple_build_assign (r, MINUS_EXPR, ac, bd); + gimple_seq_add_stmt (seq, stmt); + + stmt = gimple_build_assign (ad, MULT_EXPR, rdest, isrc); + gimple_seq_add_stmt (seq, stmt); + + stmt = gimple_build_assign (bd, MULT_EXPR, idest, rsrc); + gimple_seq_add_stmt (seq, stmt); + + stmt = gimple_build_assign (i, PLUS_EXPR, ad, bc); + gimple_seq_add_stmt (seq, stmt); + } + else + gcc_unreachable (); + + result = build2 (COMPLEX_EXPR, TREE_TYPE (dest), r, i); + gimplify_assign (dest, result, seq); +} + +/* Helper function to initialize local data for the reduction arrays. + The reduction arrays need to be placed inside the calling function + for accelerators, or else the host won't be able to preform the final + reduction. */ + +static void +oacc_initialize_reduction_data (tree clauses, tree nthreads, + gimple_seq *stmt_seqp, omp_context *ctx) +{ + tree c, t, oc; + gimple stmt; + omp_context *octx; + + /* Find the innermost OpenACC parallel context. */ + if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET + && (gimple_omp_target_kind (ctx->stmt) + == GF_OMP_TARGET_KIND_OACC_PARALLEL)) + octx = ctx; + else + octx = ctx->outer; + gcc_checking_assert (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET + && (gimple_omp_target_kind (octx->stmt) + == GF_OMP_TARGET_KIND_OACC_PARALLEL)); + + /* Extract the clauses. */ + oc = gimple_omp_target_clauses (octx->stmt); + + /* Find the last outer clause. */ + for (; oc && OMP_CLAUSE_CHAIN (oc); oc = OMP_CLAUSE_CHAIN (oc)) + ; + + /* Allocate arrays for each reduction variable. */ + for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + continue; + + tree var = OMP_CLAUSE_DECL (c); + tree type = get_base_type (var); + tree array = lookup_oacc_reduction (oacc_get_reduction_array_id (var), + ctx); + tree size, call; + + /* Calculate size of the reduction array. */ + t = create_tmp_var (TREE_TYPE (nthreads)); + stmt = gimple_build_assign (t, MULT_EXPR, nthreads, + fold_convert (TREE_TYPE (nthreads), + TYPE_SIZE_UNIT (type))); + gimple_seq_add_stmt (stmt_seqp, stmt); + + size = create_tmp_var (sizetype); + gimplify_assign (size, fold_build1 (NOP_EXPR, sizetype, t), stmt_seqp); + + /* Now allocate memory for it. */ + call = unshare_expr (builtin_decl_explicit (BUILT_IN_ALLOCA)); + stmt = gimple_build_call (call, 1, size); + gimple_call_set_lhs (stmt, array); + gimple_seq_add_stmt (stmt_seqp, stmt); + + /* Map this array into the accelerator. */ + + /* Add the reduction array to the list of clauses. */ + tree x = array; + t = build_omp_clause (gimple_location (ctx->stmt), OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (t, GOMP_MAP_FORCE_FROM); + OMP_CLAUSE_DECL (t) = x; + OMP_CLAUSE_CHAIN (t) = NULL; + if (oc) + OMP_CLAUSE_CHAIN (oc) = t; + else + gimple_omp_target_set_clauses (as_a (octx->stmt), t); + OMP_CLAUSE_SIZE (t) = size; + oc = t; + } +} + +/* Helper function to process the array of partial reductions. Nthreads + indicates the number of threads. Unfortunately, GOACC_GET_NUM_THREADS + cannot be used here, because nthreads on the host may be different than + on the accelerator. */ + +static void +oacc_finalize_reduction_data (tree clauses, tree nthreads, + gimple_seq *stmt_seqp, omp_context *ctx) +{ + tree c, x, var, array, loop_header, loop_body, loop_exit, type; + gimple stmt; + + /* Create for loop. + + let var = the original reduction variable + let array = reduction variable array + + for (i = 0; i < nthreads; i++) + var op= array[i] + */ + + loop_header = create_artificial_label (UNKNOWN_LOCATION); + loop_body = create_artificial_label (UNKNOWN_LOCATION); + loop_exit = create_artificial_label (UNKNOWN_LOCATION); + + /* Create and initialize an index variable. */ + tree ix = create_tmp_var (sizetype); + gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_zero_node), + stmt_seqp); + + /* Insert the loop header label here. */ + gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_header)); + + /* Exit loop if ix >= nthreads. */ + x = create_tmp_var (sizetype); + gimplify_assign (x, fold_build1 (NOP_EXPR, sizetype, nthreads), stmt_seqp); + stmt = gimple_build_cond (GE_EXPR, ix, x, loop_exit, loop_body); + gimple_seq_add_stmt (stmt_seqp, stmt); + + /* Insert the loop body label here. */ + gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_body)); + + /* Collapse each reduction array, one element at a time. */ + for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + continue; + + tree_code reduction_code = OMP_CLAUSE_REDUCTION_CODE (c); + + /* reduction(-:var) sums up the partial results, so it acts + identically to reduction(+:var). */ + if (reduction_code == MINUS_EXPR) + reduction_code = PLUS_EXPR; + + /* Set up reduction variable var. */ + var = OMP_CLAUSE_DECL (c); + type = get_base_type (var); + array = lookup_oacc_reduction (oacc_get_reduction_array_id + (OMP_CLAUSE_DECL (c)), ctx); + + /* Calculate the array offset. */ + tree offset = create_tmp_var (sizetype); + gimplify_assign (offset, TYPE_SIZE_UNIT (type), stmt_seqp); + stmt = gimple_build_assign (offset, MULT_EXPR, offset, ix); + gimple_seq_add_stmt (stmt_seqp, stmt); + + tree ptr = create_tmp_var (TREE_TYPE (array)); + stmt = gimple_build_assign (ptr, POINTER_PLUS_EXPR, array, offset); + gimple_seq_add_stmt (stmt_seqp, stmt); + + /* Extract array[ix] into mem. */ + tree mem = create_tmp_var (type); + gimplify_assign (mem, build_simple_mem_ref (ptr), stmt_seqp); + + /* Find the original reduction variable. */ + if (is_reference (var)) + var = build_simple_mem_ref (var); + + tree t = create_tmp_var (type); + + x = lang_hooks.decls.omp_clause_assign_op (c, t, var); + gimplify_and_add (unshare_expr(x), stmt_seqp); + + /* var = var op mem */ + switch (OMP_CLAUSE_REDUCTION_CODE (c)) + { + case TRUTH_ANDIF_EXPR: + case TRUTH_ORIF_EXPR: + t = fold_build2 (OMP_CLAUSE_REDUCTION_CODE (c), integer_type_node, + t, mem); + gimplify_and_add (t, stmt_seqp); + break; + default: + /* The lhs isn't a gimple_reg when var is COMPLEX_TYPE. */ + oacc_gimple_assign (t, OMP_CLAUSE_REDUCTION_CODE (c), mem, + stmt_seqp); + } + + t = fold_build1 (NOP_EXPR, TREE_TYPE (var), t); + x = lang_hooks.decls.omp_clause_assign_op (c, var, t); + gimplify_and_add (unshare_expr(x), stmt_seqp); + } + + /* Increment the induction variable. */ + tree one = fold_build1 (NOP_EXPR, sizetype, integer_one_node); + stmt = gimple_build_assign (ix, PLUS_EXPR, ix, one); + gimple_seq_add_stmt (stmt_seqp, stmt); + + /* Go back to the top of the loop. */ + gimple_seq_add_stmt (stmt_seqp, gimple_build_goto (loop_header)); + + /* Place the loop exit label here. */ + gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_exit)); +} + +/* Scan through all of the gimple stmts searching for an OMP_FOR_EXPR, and + scan that for reductions. */ + +static void +oacc_process_reduction_data (gimple_seq *body, gimple_seq *in_stmt_seqp, + gimple_seq *out_stmt_seqp, omp_context *ctx) +{ + gimple_stmt_iterator gsi; + gimple_seq inner = NULL; + + /* A collapse clause may have inserted a new bind block. */ + gsi = gsi_start (*body); + while (!gsi_end_p (gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (gbind *bind_stmt = dyn_cast (stmt)) + { + inner = gimple_bind_body (bind_stmt); + body = &inner; + gsi = gsi_start (*body); + } + else if (dyn_cast (stmt)) + break; + else + gsi_next (&gsi); + } + + for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi)) + { + tree clauses, nthreads, t, c, acc_device, acc_device_host, call, + enter, exit; + bool reduction_found = false; + + gimple stmt = gsi_stmt (gsi); + + switch (gimple_code (stmt)) + { + case GIMPLE_OMP_FOR: + clauses = gimple_omp_for_clauses (stmt); + + /* Search for a reduction clause. */ + for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + { + reduction_found = true; + break; + } + + if (!reduction_found) + break; + + ctx = maybe_lookup_ctx (stmt); + t = NULL_TREE; + + /* Extract the number of threads. */ + nthreads = create_tmp_var (sizetype); + t = oacc_max_threads (ctx); + gimplify_assign (nthreads, t, in_stmt_seqp); + + /* Determine if this is kernel will be executed on the host. */ + call = builtin_decl_explicit (BUILT_IN_ACC_GET_DEVICE_TYPE); + acc_device = create_tmp_var (integer_type_node, ".acc_device_type"); + stmt = gimple_build_call (call, 0); + gimple_call_set_lhs (stmt, acc_device); + gimple_seq_add_stmt (in_stmt_seqp, stmt); + + /* Set nthreads = 1 for ACC_DEVICE_TYPE=host. */ + acc_device_host = create_tmp_var (integer_type_node, + ".acc_device_host"); + gimplify_assign (acc_device_host, + build_int_cst (integer_type_node, + GOMP_DEVICE_HOST), + in_stmt_seqp); + + enter = create_artificial_label (UNKNOWN_LOCATION); + exit = create_artificial_label (UNKNOWN_LOCATION); + + stmt = gimple_build_cond (EQ_EXPR, acc_device, acc_device_host, + enter, exit); + gimple_seq_add_stmt (in_stmt_seqp, stmt); + gimple_seq_add_stmt (in_stmt_seqp, gimple_build_label (enter)); + gimplify_assign (nthreads, fold_build1 (NOP_EXPR, sizetype, + integer_one_node), + in_stmt_seqp); + gimple_seq_add_stmt (in_stmt_seqp, gimple_build_label (exit)); + + /* Also, set nthreads = 1 for ACC_DEVICE_TYPE=host_nonshm. */ + gimplify_assign (acc_device_host, + build_int_cst (integer_type_node, + GOMP_DEVICE_HOST_NONSHM), + in_stmt_seqp); + + enter = create_artificial_label (UNKNOWN_LOCATION); + exit = create_artificial_label (UNKNOWN_LOCATION); + + stmt = gimple_build_cond (EQ_EXPR, acc_device, acc_device_host, + enter, exit); + gimple_seq_add_stmt (in_stmt_seqp, stmt); + gimple_seq_add_stmt (in_stmt_seqp, gimple_build_label (enter)); + gimplify_assign (nthreads, fold_build1 (NOP_EXPR, sizetype, + integer_one_node), + in_stmt_seqp); + gimple_seq_add_stmt (in_stmt_seqp, gimple_build_label (exit)); + + oacc_initialize_reduction_data (clauses, nthreads, in_stmt_seqp, + ctx); + oacc_finalize_reduction_data (clauses, nthreads, out_stmt_seqp, ctx); + break; + default: + // Scan for other directives which support reduction here. + break; + } + } +} /* If ctx is a worksharing context inside of a cancellable parallel region and it isn't nowait, add lhs to its GIMPLE_OMP_RETURN @@ -9389,7 +10424,7 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx) else decl = *n; - /* If '#pragma omp critical' is inside target region or + /* If '#pragma omp critical' is inside offloaded region or inside function marked as offloadable, the symbol must be marked as offloadable too. */ omp_context *octx; @@ -9397,7 +10432,7 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx) varpool_node::get_create (decl)->offloadable = 1; else for (octx = ctx->outer; octx; octx = octx->outer) - if (is_targetreg_ctx (octx)) + if (is_gimple_omp_offloaded (octx->stmt)) { varpool_node::get_create (decl)->offloadable = 1; break; @@ -9499,7 +10534,7 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, } -/* Lower code for an OpenMP loop directive. */ +/* Lower code for an OMP loop directive. */ static void lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) @@ -10140,7 +11175,7 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) } } -/* Lower the OpenMP target directive in the current statement +/* Lower the GIMPLE_OMP_TARGET in the current statement in GSI_P. CTX holds context information for the directive. */ static void @@ -10149,24 +11184,52 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) tree clauses; tree child_fn, t, c; gomp_target *stmt = as_a (gsi_stmt (*gsi_p)); - gbind *tgt_bind = NULL, *bind; - gimple_seq tgt_body = NULL, olist, ilist, new_body; + gbind *tgt_bind, *bind; + gimple_seq tgt_body, olist, ilist, orlist, irlist, new_body; location_t loc = gimple_location (stmt); - int kind = gimple_omp_target_kind (stmt); + bool offloaded, data_region; unsigned int map_cnt = 0; + offloaded = is_gimple_omp_offloaded (stmt); + switch (gimple_omp_target_kind (stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + data_region = false; + break; + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA: + data_region = true; + break; + default: + gcc_unreachable (); + } + clauses = gimple_omp_target_clauses (stmt); - if (kind == GF_OMP_TARGET_KIND_REGION) + + tgt_bind = NULL; + tgt_body = NULL; + if (offloaded) { tgt_bind = gimple_seq_first_stmt_as_a_bind (gimple_omp_body (stmt)); tgt_body = gimple_bind_body (tgt_bind); } - else if (kind == GF_OMP_TARGET_KIND_DATA) + else if (data_region) tgt_body = gimple_omp_body (stmt); child_fn = ctx->cb.dst_fn; push_gimplify_context (); + irlist = NULL; + orlist = NULL; + if (offloaded + && is_gimple_omp_oacc (stmt)) + oacc_process_reduction_data (&tgt_body, &irlist, &orlist, ctx); + for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) switch (OMP_CLAUSE_CODE (c)) { @@ -10175,6 +11238,31 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) default: break; case OMP_CLAUSE_MAP: +#ifdef ENABLE_CHECKING + /* First check what we're prepared to handle in the following. */ + switch (OMP_CLAUSE_MAP_KIND (c)) + { + case GOMP_MAP_ALLOC: + case GOMP_MAP_TO: + case GOMP_MAP_FROM: + case GOMP_MAP_TOFROM: + case GOMP_MAP_POINTER: + case GOMP_MAP_TO_PSET: + break; + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_FORCE_PRESENT: + case GOMP_MAP_FORCE_DEALLOC: + case GOMP_MAP_FORCE_DEVICEPTR: + gcc_assert (is_gimple_omp_oacc (stmt)); + break; + default: + gcc_unreachable (); + } +#endif + /* FALLTHRU */ case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: var = OMP_CLAUSE_DECL (c); @@ -10199,12 +11287,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (!maybe_lookup_field (var, ctx)) continue; - if (kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded) { x = build_receiver_ref (var, true, ctx); tree new_var = lookup_decl (var, ctx); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) x = build_simple_mem_ref (x); @@ -10214,16 +11301,16 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) map_cnt++; } - if (kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded) { target_nesting_level++; lower_omp (&tgt_body, ctx); target_nesting_level--; } - else if (kind == GF_OMP_TARGET_KIND_DATA) + else if (data_region) lower_omp (&tgt_body, ctx); - if (kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded) { /* Declare all the variables created by mapping and the variables declared in the scope of the target body. */ @@ -10247,9 +11334,20 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) DECL_NAMELESS (TREE_VEC_ELT (t, 1)) = 1; TREE_ADDRESSABLE (TREE_VEC_ELT (t, 1)) = 1; TREE_STATIC (TREE_VEC_ELT (t, 1)) = 1; + tree tkind_type; + int talign_shift; + if (is_gimple_omp_oacc (stmt)) + { + tkind_type = short_unsigned_type_node; + talign_shift = 8; + } + else + { + tkind_type = unsigned_char_type_node; + talign_shift = 3; + } TREE_VEC_ELT (t, 2) - = create_tmp_var (build_array_type_nelts (unsigned_char_type_node, - map_cnt), + = create_tmp_var (build_array_type_nelts (tkind_type, map_cnt), ".omp_data_kinds"); DECL_NAMELESS (TREE_VEC_ELT (t, 2)) = 1; TREE_ADDRESSABLE (TREE_VEC_ELT (t, 2)) = 1; @@ -10315,12 +11413,18 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree var = lookup_decl_in_outer_ctx (ovar, ctx); tree x = build_sender_ref (ovar, ctx); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER - && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) - && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE) + if (maybe_lookup_oacc_reduction (var, ctx)) { - gcc_assert (kind == GF_OMP_TARGET_KIND_REGION); + gcc_checking_assert (offloaded + && is_gimple_omp_oacc (stmt)); + gimplify_assign (x, var, &ilist); + } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER + && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) + && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE) + { + gcc_assert (offloaded); tree avar = create_tmp_var (TREE_TYPE (TREE_TYPE (x))); mark_addressable (avar); @@ -10331,16 +11435,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else if (is_gimple_reg (var)) { - gcc_assert (kind == GF_OMP_TARGET_KIND_REGION); + gcc_assert (offloaded); tree avar = create_tmp_var (TREE_TYPE (var)); mark_addressable (avar); - if (OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_ALLOC - && OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_FROM) + enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c); + if (GOMP_MAP_COPY_TO_P (map_kind) + || map_kind == GOMP_MAP_POINTER + || map_kind == GOMP_MAP_TO_PSET + || map_kind == GOMP_MAP_FORCE_DEVICEPTR) gimplify_assign (avar, var, &ilist); avar = build_fold_addr_expr (avar); gimplify_assign (x, avar, &ilist); - if ((OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_FROM - || OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_TOFROM) + if ((GOMP_MAP_COPY_FROM_P (map_kind) + || map_kind == GOMP_MAP_FORCE_DEVICEPTR) && !TYPE_READONLY (TREE_TYPE (var))) { x = build_sender_ref (ovar, ctx); @@ -10363,26 +11470,29 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (TREE_CODE (s) != INTEGER_CST) TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0; - unsigned char tkind = 0; + unsigned HOST_WIDE_INT tkind; switch (OMP_CLAUSE_CODE (c)) { case OMP_CLAUSE_MAP: tkind = OMP_CLAUSE_MAP_KIND (c); break; case OMP_CLAUSE_TO: - tkind = OMP_CLAUSE_MAP_TO; + tkind = GOMP_MAP_TO; break; case OMP_CLAUSE_FROM: - tkind = OMP_CLAUSE_MAP_FROM; + tkind = GOMP_MAP_FROM; break; default: gcc_unreachable (); } + gcc_checking_assert (tkind + < (HOST_WIDE_INT_C (1U) << talign_shift)); talign = ceil_log2 (talign); - tkind |= talign << 3; + tkind |= talign << talign_shift; + gcc_checking_assert (tkind + <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type))); CONSTRUCTOR_APPEND_ELT (vkind, purpose, - build_int_cst (unsigned_char_type_node, - tkind)); + build_int_cstu (tkind_type, tkind)); if (nc && nc != c) c = nc; } @@ -10420,7 +11530,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) new_body = NULL; - if (ctx->record_type && kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded + && ctx->record_type) { t = build_fold_addr_expr_loc (loc, ctx->sender_decl); /* fixup_child_record_type might have changed receiver_decl's type. */ @@ -10429,14 +11540,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_build_assign (ctx->receiver_decl, t)); } - if (kind == GF_OMP_TARGET_KIND_REGION) + if (offloaded) { gimple_seq_add_seq (&new_body, tgt_body); new_body = maybe_catch_exception (new_body); } - else if (kind == GF_OMP_TARGET_KIND_DATA) + else if (data_region) new_body = tgt_body; - if (kind != GF_OMP_TARGET_KIND_UPDATE) + if (offloaded || data_region) { gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false)); gimple_omp_set_body (stmt, new_body); @@ -10446,9 +11557,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) tgt_bind ? gimple_bind_block (tgt_bind) : NULL_TREE); gsi_replace (gsi_p, bind, true); + gimple_bind_add_seq (bind, irlist); gimple_bind_add_seq (bind, ilist); gimple_bind_add_stmt (bind, stmt); gimple_bind_add_seq (bind, olist); + gimple_bind_add_seq (bind, orlist); pop_gimplify_context (NULL); } @@ -10520,7 +11633,7 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) /* Callback for lower_omp_1. Return non-NULL if *tp needs to be regimplified. If DATA is non-NULL, lower_omp_1 is outside - of OpenMP context, but with task_shared_vars set. */ + of OMP context, but with task_shared_vars set. */ static tree lower_omp_regimplify_p (tree *tp, int *walk_subtrees, @@ -10560,7 +11673,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx) memset (&wi, '\0', sizeof (wi)); /* If we have issued syntax errors, avoid doing any heavy lifting. - Just replace the OpenMP directives with a NOP to avoid + Just replace the OMP directives with a NOP to avoid confusing RTL expansion. */ if (seen_error () && is_gimple_omp (stmt)) { @@ -10747,8 +11860,8 @@ lower_omp (gimple_seq *body, omp_context *ctx) gimple_stmt_iterator gsi; for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi)) lower_omp_1 (&gsi, ctx); - /* During gimplification, we have not always invoked fold_stmt - (gimplify.c:maybe_fold_stmt); call it now. */ + /* During gimplification, we haven't folded statments inside offloading + regions (gimplify.c:maybe_fold_stmt); do that now. */ if (target_nesting_level) for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi)) fold_stmt (&gsi); @@ -10765,8 +11878,9 @@ execute_lower_omp (void) omp_context *ctx; /* This pass always runs, to provide PROP_gimple_lomp. - But there is nothing to do unless -fopenmp is given. */ - if (flag_openmp == 0 && flag_openmp_simd == 0 && flag_cilkplus == 0) + But often, there is nothing to do. */ + if (flag_cilkplus == 0 && flag_openacc == 0 && flag_openmp == 0 + && flag_openmp_simd == 0) return 0; all_contexts = splay_tree_new (splay_tree_compare_pointers, 0, @@ -10832,7 +11946,7 @@ make_pass_lower_omp (gcc::context *ctxt) return new pass_lower_omp (ctxt); } -/* The following is a utility to diagnose OpenMP structured block violations. +/* The following is a utility to diagnose structured block violations. It is not part of the "omplower" pass, as that's invoked too late. It should be invoked by the respective front ends after gimplification. */ @@ -10845,9 +11959,38 @@ static bool diagnose_sb_0 (gimple_stmt_iterator *gsi_p, gimple branch_ctx, gimple label_ctx) { + gcc_checking_assert (!branch_ctx || is_gimple_omp (branch_ctx)); + gcc_checking_assert (!label_ctx || is_gimple_omp (label_ctx)); + if (label_ctx == branch_ctx) return false; + const char* kind = NULL; + + if (flag_cilkplus) + { + if ((branch_ctx + && gimple_code (branch_ctx) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD) + || (label_ctx + && gimple_code (label_ctx) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD)) + kind = "Cilk Plus"; + } + if (flag_openacc) + { + if ((branch_ctx && is_gimple_omp_oacc (branch_ctx)) + || (label_ctx && is_gimple_omp_oacc (label_ctx))) + { + gcc_checking_assert (kind == NULL); + kind = "OpenACC"; + } + } + if (kind == NULL) + { + gcc_checking_assert (flag_openmp); + kind = "OpenMP"; + } /* Previously we kept track of the label's entire context in diagnose_sb_[12] @@ -10880,45 +12023,25 @@ diagnose_sb_0 (gimple_stmt_iterator *gsi_p, } if (exit_p) - error ("invalid exit from OpenMP structured block"); + error ("invalid exit from %s structured block", kind); else - error ("invalid entry to OpenMP structured block"); + error ("invalid entry to %s structured block", kind); #endif - bool cilkplus_block = false; - if (flag_cilkplus) - { - if ((branch_ctx - && gimple_code (branch_ctx) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD) - || (label_ctx - && gimple_code (label_ctx) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD)) - cilkplus_block = true; - } - /* If it's obvious we have an invalid entry, be specific about the error. */ if (branch_ctx == NULL) - { - if (cilkplus_block) - error ("invalid entry to Cilk Plus structured block"); - else - error ("invalid entry to OpenMP structured block"); - } + error ("invalid entry to %s structured block", kind); else { /* Otherwise, be vague and lazy, but efficient. */ - if (cilkplus_block) - error ("invalid branch to/from a Cilk Plus structured block"); - else - error ("invalid branch to/from an OpenMP structured block"); + error ("invalid branch to/from %s structured block", kind); } gsi_replace (gsi_p, gimple_build_nop (), false); return true; } -/* Pass 1: Create a minimal tree of OpenMP structured blocks, and record +/* Pass 1: Create a minimal tree of structured blocks, and record where each label is found. */ static tree @@ -10931,7 +12054,7 @@ diagnose_sb_1 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, *handled_ops_p = true; - switch (gimple_code (stmt)) + switch (gimple_code (stmt)) { WALK_SUBSTMTS; @@ -11079,8 +12202,8 @@ diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, return NULL_TREE; } -/* Called from tree-cfg.c::make_edges to create cfg edges for all GIMPLE_OMP - codes. */ +/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant + GIMPLE_* codes. */ bool make_gimple_omp_edges (basic_block bb, struct omp_region **region, int *region_idx) @@ -11109,8 +12232,22 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region, case GIMPLE_OMP_TARGET: cur_region = new_omp_region (bb, code, cur_region); fallthru = true; - if (gimple_omp_target_kind (last) == GF_OMP_TARGET_KIND_UPDATE) - cur_region = cur_region->outer; + switch (gimple_omp_target_kind (last)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_DATA: + break; + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + cur_region = cur_region->outer; + break; + default: + gcc_unreachable (); + } break; case GIMPLE_OMP_SECTIONS: @@ -11247,7 +12384,10 @@ public: {} /* opt_pass methods: */ - virtual bool gate (function *) { return flag_openmp || flag_cilkplus; } + virtual bool gate (function *) + { + return flag_cilkplus || flag_openacc || flag_openmp; + } virtual unsigned int execute (function *) { return diagnose_omp_structured_block_errors (); @@ -12515,7 +13655,7 @@ add_decls_addresses_to_decl_constructor (vec *v_decls, /* Create new symbols containing (address, size) pairs for global variables, marked with "omp declare target" attribute, as well as addresses for the - functions, which are outlined target regions. */ + functions, which are outlined offloading regions. */ void omp_finish_file (void) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d9525d59650..885a7101737 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,133 @@ +2015-01-15 Thomas Schwinge + James Norris + Cesar Philippidis + Ilmir Usmanov + + * lib/target-supports.exp (check_effective_target_fopenacc): New + procedure. + * g++.dg/goacc-gomp/goacc-gomp.exp: New file. + * g++.dg/goacc/goacc.exp: Likewise. + * gcc.dg/goacc-gomp/goacc-gomp.exp: Likewise. + * gcc.dg/goacc/goacc.exp: Likewise. + * gfortran.dg/goacc/goacc.exp: Likewise. + * c-c++-common/cpp/openacc-define-1.c: New file. + * c-c++-common/cpp/openacc-define-2.c: Likewise. + * c-c++-common/cpp/openacc-define-3.c: Likewise. + * c-c++-common/goacc-gomp/nesting-1.c: Likewise. + * c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise. + * c-c++-common/goacc/acc_on_device-2-off.c: Likewise. + * c-c++-common/goacc/acc_on_device-2.c: Likewise. + * c-c++-common/goacc/asyncwait-1.c: Likewise. + * c-c++-common/goacc/cache-1.c: Likewise. + * c-c++-common/goacc/clauses-fail.c: Likewise. + * c-c++-common/goacc/collapse-1.c: Likewise. + * c-c++-common/goacc/data-1.c: Likewise. + * c-c++-common/goacc/data-2.c: Likewise. + * c-c++-common/goacc/data-clause-duplicate-1.c: Likewise. + * c-c++-common/goacc/deviceptr-1.c: Likewise. + * c-c++-common/goacc/deviceptr-2.c: Likewise. + * c-c++-common/goacc/deviceptr-3.c: Likewise. + * c-c++-common/goacc/if-clause-1.c: Likewise. + * c-c++-common/goacc/if-clause-2.c: Likewise. + * c-c++-common/goacc/kernels-1.c: Likewise. + * c-c++-common/goacc/loop-1.c: Likewise. + * c-c++-common/goacc/loop-private-1.c: Likewise. + * c-c++-common/goacc/nesting-1.c: Likewise. + * c-c++-common/goacc/nesting-data-1.c: Likewise. + * c-c++-common/goacc/nesting-fail-1.c: Likewise. + * c-c++-common/goacc/parallel-1.c: Likewise. + * c-c++-common/goacc/pcopy.c: Likewise. + * c-c++-common/goacc/pcopyin.c: Likewise. + * c-c++-common/goacc/pcopyout.c: Likewise. + * c-c++-common/goacc/pcreate.c: Likewise. + * c-c++-common/goacc/pragma_context.c: Likewise. + * c-c++-common/goacc/present-1.c: Likewise. + * c-c++-common/goacc/reduction-1.c: Likewise. + * c-c++-common/goacc/reduction-2.c: Likewise. + * c-c++-common/goacc/reduction-3.c: Likewise. + * c-c++-common/goacc/reduction-4.c: Likewise. + * c-c++-common/goacc/sb-1.c: Likewise. + * c-c++-common/goacc/sb-2.c: Likewise. + * c-c++-common/goacc/sb-3.c: Likewise. + * c-c++-common/goacc/update-1.c: Likewise. + * gcc.dg/goacc/acc_on_device-1.c: Likewise. + * gfortran.dg/goacc/acc_on_device-1.f95: Likewise. + * gfortran.dg/goacc/acc_on_device-2-off.f95: Likewise. + * gfortran.dg/goacc/acc_on_device-2.f95: Likewise. + * gfortran.dg/goacc/assumed.f95: Likewise. + * gfortran.dg/goacc/asyncwait-1.f95: Likewise. + * gfortran.dg/goacc/asyncwait-2.f95: Likewise. + * gfortran.dg/goacc/asyncwait-3.f95: Likewise. + * gfortran.dg/goacc/asyncwait-4.f95: Likewise. + * gfortran.dg/goacc/branch.f95: Likewise. + * gfortran.dg/goacc/cache-1.f95: Likewise. + * gfortran.dg/goacc/coarray.f95: Likewise. + * gfortran.dg/goacc/continuation-free-form.f95: Likewise. + * gfortran.dg/goacc/cray.f95: Likewise. + * gfortran.dg/goacc/critical.f95: Likewise. + * gfortran.dg/goacc/data-clauses.f95: Likewise. + * gfortran.dg/goacc/data-tree.f95: Likewise. + * gfortran.dg/goacc/declare-1.f95: Likewise. + * gfortran.dg/goacc/enter-exit-data.f95: Likewise. + * gfortran.dg/goacc/fixed-1.f: Likewise. + * gfortran.dg/goacc/fixed-2.f: Likewise. + * gfortran.dg/goacc/fixed-3.f: Likewise. + * gfortran.dg/goacc/fixed-4.f: Likewise. + * gfortran.dg/goacc/host_data-tree.f95: Likewise. + * gfortran.dg/goacc/if.f95: Likewise. + * gfortran.dg/goacc/kernels-tree.f95: Likewise. + * gfortran.dg/goacc/list.f95: Likewise. + * gfortran.dg/goacc/literal.f95: Likewise. + * gfortran.dg/goacc/loop-1.f95: Likewise. + * gfortran.dg/goacc/loop-2.f95: Likewise. + * gfortran.dg/goacc/loop-3.f95: Likewise. + * gfortran.dg/goacc/loop-tree-1.f90: Likewise. + * gfortran.dg/goacc/omp.f95: Likewise. + * gfortran.dg/goacc/parallel-kernels-clauses.f95: Likewise. + * gfortran.dg/goacc/parallel-kernels-regions.f95: Likewise. + * gfortran.dg/goacc/parallel-tree.f95: Likewise. + * gfortran.dg/goacc/parameter.f95: Likewise. + * gfortran.dg/goacc/private-1.f95: Likewise. + * gfortran.dg/goacc/private-2.f95: Likewise. + * gfortran.dg/goacc/private-3.f95: Likewise. + * gfortran.dg/goacc/pure-elemental-procedures.f95: Likewise. + * gfortran.dg/goacc/reduction-2.f95: Likewise. + * gfortran.dg/goacc/reduction.f95: Likewise. + * gfortran.dg/goacc/routine-1.f90: Likewise. + * gfortran.dg/goacc/routine-2.f90: Likewise. + * gfortran.dg/goacc/sentinel-free-form.f95: Likewise. + * gfortran.dg/goacc/several-directives.f95: Likewise. + * gfortran.dg/goacc/sie.f95: Likewise. + * gfortran.dg/goacc/subarrays.f95: Likewise. + * gfortran.dg/gomp/map-1.f90: Likewise. + * gfortran.dg/openacc-define-1.f90: Likewise. + * gfortran.dg/openacc-define-2.f90: Likewise. + * gfortran.dg/openacc-define-3.f90: Likewise. + * g++.dg/gomp/block-1.C: Update for changed compiler output. + * g++.dg/gomp/block-2.C: Likewise. + * g++.dg/gomp/block-3.C: Likewise. + * g++.dg/gomp/block-5.C: Likewise. + * g++.dg/gomp/target-1.C: Likewise. + * g++.dg/gomp/target-2.C: Likewise. + * g++.dg/gomp/taskgroup-1.C: Likewise. + * g++.dg/gomp/teams-1.C: Likewise. + * gcc.dg/cilk-plus/jump-openmp.c: Likewise. + * gcc.dg/cilk-plus/jump.c: Likewise. + * gcc.dg/gomp/block-1.c: Likewise. + * gcc.dg/gomp/block-10.c: Likewise. + * gcc.dg/gomp/block-2.c: Likewise. + * gcc.dg/gomp/block-3.c: Likewise. + * gcc.dg/gomp/block-4.c: Likewise. + * gcc.dg/gomp/block-5.c: Likewise. + * gcc.dg/gomp/block-6.c: Likewise. + * gcc.dg/gomp/block-7.c: Likewise. + * gcc.dg/gomp/block-8.c: Likewise. + * gcc.dg/gomp/block-9.c: Likewise. + * gcc.dg/gomp/target-1.c: Likewise. + * gcc.dg/gomp/target-2.c: Likewise. + * gcc.dg/gomp/taskgroup-1.c: Likewise. + * gcc.dg/gomp/teams-1.c: Likewise. + 2015-01-15 David Malcolm * jit.dg/test-error-mismatching-types-in-assignment-op.c: New diff --git a/gcc/testsuite/c-c++-common/cpp/openacc-define-1.c b/gcc/testsuite/c-c++-common/cpp/openacc-define-1.c new file mode 100644 index 00000000000..cd37548daff --- /dev/null +++ b/gcc/testsuite/c-c++-common/cpp/openacc-define-1.c @@ -0,0 +1,6 @@ +/* { dg-do preprocess } */ +/* { dg-require-effective-target fopenacc } */ + +#ifdef _OPENACC +# error _OPENACC defined +#endif diff --git a/gcc/testsuite/c-c++-common/cpp/openacc-define-2.c b/gcc/testsuite/c-c++-common/cpp/openacc-define-2.c new file mode 100644 index 00000000000..b007e32bfc2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/cpp/openacc-define-2.c @@ -0,0 +1,7 @@ +/* { dg-options "-fno-openacc" } */ +/* { dg-do preprocess } */ +/* { dg-require-effective-target fopenacc } */ + +#ifdef _OPENACC +# error _OPENACC defined +#endif diff --git a/gcc/testsuite/c-c++-common/cpp/openacc-define-3.c b/gcc/testsuite/c-c++-common/cpp/openacc-define-3.c new file mode 100644 index 00000000000..ccedcd90782 --- /dev/null +++ b/gcc/testsuite/c-c++-common/cpp/openacc-define-3.c @@ -0,0 +1,11 @@ +/* { dg-options "-fopenacc" } */ +/* { dg-do preprocess } */ +/* { dg-require-effective-target fopenacc } */ + +#ifndef _OPENACC +# error _OPENACC not defined +#endif + +#if _OPENACC != 201306 +# error _OPENACC defined to wrong value +#endif diff --git a/gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c b/gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c new file mode 100644 index 00000000000..df45bcf05fc --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc-gomp/nesting-1.c @@ -0,0 +1,12 @@ +void +f_omp_parallel (void) +{ +#pragma omp parallel + { + int i; + +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } +} diff --git a/gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c b/gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c new file mode 100644 index 00000000000..411fb5f8755 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c @@ -0,0 +1,457 @@ +extern int i; + +void +f_omp (void) +{ +#pragma omp parallel + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + } + +#pragma omp for + for (i = 0; i < 3; i++) + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc loop /* { dg-error "may not be closely nested" } */ + for (i = 0; i < 2; ++i) + ; + } + +#pragma omp sections + { + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; + } +#pragma omp section + { +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; + } +#pragma omp section + { +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; + } +#pragma omp section + { +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + } +#pragma omp section + { +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + } +#pragma omp section + { +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + } +#pragma omp section + { +#pragma acc loop /* { dg-error "may not be closely nested" } */ + for (i = 0; i < 2; ++i) + ; + } + } + +#pragma omp single + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc loop /* { dg-error "may not be closely nested" } */ + for (i = 0; i < 2; ++i) + ; + } + +#pragma omp task + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc loop /* { dg-error "may not be closely nested" } */ + for (i = 0; i < 2; ++i) + ; + } + +#pragma omp master + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc loop /* { dg-error "may not be closely nested" } */ + for (i = 0; i < 2; ++i) + ; + } + +#pragma omp critical + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc loop /* { dg-error "may not be closely nested" } */ + for (i = 0; i < 2; ++i) + ; + } + +#pragma omp ordered + { +#pragma acc parallel /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc data /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC construct inside of non-OpenACC region" } */ +#pragma acc loop /* { dg-error "may not be closely nested" } */ + for (i = 0; i < 2; ++i) + ; + } + +#pragma omp target + { +#pragma acc parallel /* { dg-error "OpenACC parallel construct inside of OpenMP target region" } */ + ; +#pragma acc kernels /* { dg-error "OpenACC kernels construct inside of OpenMP target region" } */ + ; +#pragma acc data /* { dg-error "OpenACC data construct inside of OpenMP target region" } */ + ; +#pragma acc update host(i) /* { dg-error "OpenACC update construct inside of OpenMP target region" } */ +#pragma acc enter data copyin(i) /* { dg-error "OpenACC enter/exit data construct inside of OpenMP target region" } */ +#pragma acc exit data delete(i) /* { dg-error "OpenACC enter/exit data construct inside of OpenMP target region" } */ +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } +} + +void +f_acc_parallel (void) +{ +#pragma acc parallel + { +#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc parallel + { +#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + for (i = 0; i < 3; i++) + ; + } + +#pragma acc parallel + { +#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + { + ; + } + } + +#pragma acc parallel + { +#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc parallel + { +#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc parallel + { +#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc parallel + { +#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc parallel + { +#pragma omp atomic write + i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } + +#pragma acc parallel + { +#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc parallel + { +#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } +} + +void +f_acc_kernels (void) +{ +#pragma acc kernels + { +#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc kernels + { +#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + for (i = 0; i < 3; i++) + ; + } + +#pragma acc kernels + { +#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + { + ; + } + } + +#pragma acc kernels + { +#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc kernels + { +#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc kernels + { +#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc kernels + { +#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc kernels + { +#pragma omp atomic write + i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } + +#pragma acc kernels + { +#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc kernels + { +#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } +} + +void +f_acc_data (void) +{ +#pragma acc data + { +#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc data + { +#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + for (i = 0; i < 3; i++) + ; + } + +#pragma acc data + { +#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + { + ; + } + } + +#pragma acc data + { +#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc data + { +#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc data + { +#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc data + { +#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc data + { +#pragma omp atomic write + i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } + +#pragma acc data + { +#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc data + { +#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } +} + +void +f_acc_loop (void) +{ +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp parallel /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp for /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + for (i = 0; i < 3; i++) + ; + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp sections /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + { + ; + } + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp single /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp task /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp master /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp critical /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp atomic write + i = 0; /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp ordered /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; + } + +#pragma acc loop + for (i = 0; i < 2; ++i) + { +#pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + ; +#pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of OpenACC region" } */ + } +} diff --git a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c new file mode 100644 index 00000000000..25d21ad29c9 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c @@ -0,0 +1,25 @@ +/* Have to enable optimizations, as otherwise builtins won't be expanded. */ +/* { dg-additional-options "-O -fdump-rtl-expand -fno-openacc" } */ + +#if __cplusplus +extern "C" { +#endif + +typedef enum acc_device_t { acc_device_X = 123 } acc_device_t; +extern int acc_on_device (acc_device_t); + +#if __cplusplus +} +#endif + +int +f (void) +{ + const acc_device_t dev = acc_device_X; + return acc_on_device (dev); +} + +/* Without -fopenacc, we're expecting one call. + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]*\\\"acc_on_device" 1 "expand" } } */ + +/* { dg-final { cleanup-rtl-dump "expand" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c new file mode 100644 index 00000000000..d5389a99072 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c @@ -0,0 +1,29 @@ +/* Have to enable optimizations, as otherwise builtins won't be expanded. */ +/* { dg-additional-options "-O -fdump-rtl-expand" } */ + +#if __cplusplus +extern "C" { +#endif + +typedef enum acc_device_t { acc_device_X = 123 } acc_device_t; +extern int acc_on_device (acc_device_t); + +#if __cplusplus +} +#endif + +int +f (void) +{ + const acc_device_t dev = acc_device_X; + return acc_on_device (dev); +} + +/* With -fopenacc, we're expecting the builtin to be expanded, so no calls. + TODO: in C++, even under extern "C", the use of enum for acc_device_t + perturbs expansion as a builtin, which expects an int parameter. It's fine + when changing acc_device_t to plain int, but that's not what we're doing in + . + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]*\\\"acc_on_device" 0 "expand" { xfail c++ } } } */ + +/* { dg-final { cleanup-rtl-dump "expand" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/asyncwait-1.c b/gcc/testsuite/c-c++-common/goacc/asyncwait-1.c new file mode 100644 index 00000000000..ccc0106832a --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/asyncwait-1.c @@ -0,0 +1,213 @@ +void +f (int N, float *a, float *b) +{ + int ii; + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 2) /* { dg-error "expected '\\)' before numeric constant" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (,1) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2 3) /* { dg-error "expected '\\)' before numeric constant" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,,) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 /* { dg-error "expected '\\)' before end of line" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (*) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (a) + /* { dg-error "expected integer expression before" "" { target c } 54 } */ + /* { dg-error "'async' expression must be integral" "" { target c++ } 54 } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1.0) + /* { dg-error "expected integer expression before" "" { target c } 62 } */ + /* { dg-error "'async' expression must be integral" "" { target c++ } 62 } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async () /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 2) /* { dg-error "expected '\\)' before numeric constant" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (,1) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2 3) /* { dg-error "expected '\\)' before numeric constant" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,,) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 /* { dg-error "expected '\\\)' before end of line" } */ + /* { dg-error "expected integer expression before '\\\)'" "" { target c++ } 118 } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,*) /* { dg-error "expected (primary-|)expression before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,a) /*{ dg-error "must be integral" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (a) /* { dg-error "must be integral" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1.0) /* { dg-error "must be integral" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait () /* { dg-error "expected (integer |)expression (list |)before" } */ + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait + { + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1 2) /* { dg-error "expected '\\)' before numeric constant" } */ + +#pragma acc wait (1,) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait (,1) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait (1,2,) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait (1,2 3) /* { dg-error "expected '\\)' before numeric constant" } */ + +#pragma acc wait (1,2,,) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait (1 /* { dg-error "expected '\\\)' before end of line" } */ + /* { dg-error "expected integer expression before '\\\)'" "" { target c++ } 173 } */ + +#pragma acc wait (1,*) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait (1,a) /* { dg-error "expression must be integral" } */ + +#pragma acc wait (a) /* { dg-error "expression must be integral" } */ + +#pragma acc wait (1.0) /* { dg-error "expression must be integral" } */ + +#pragma acc wait 1 /* { dg-error "expected '#pragma acc' clause before numeric constant" } */ + +#pragma acc wait N /* { dg-error "expected '#pragma acc' clause before 'N'" } */ + +#pragma acc wait async (1 2) /* { dg-error "expected '\\)' before numeric constant" } */ + +#pragma acc wait async (1 2) /* { dg-error "expected '\\)' before numeric constant" } */ + +#pragma acc wait async (1,) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait async (,1) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait async (1,2,) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait async (1,2 3) /* { dg-error "expected '\\)' before numeric constant" } */ + +#pragma acc wait async (1,2,,) /* { dg-error "expected (primary-|)expression before" } */ + +#pragma acc wait async (1 /* { dg-error "expected '\\)' before end of line" } */ + +#pragma acc wait async (*) /* { dg-error "expected (primary-|)expression before " } */ + +#pragma acc wait async (a) + /* { dg-error "expected integer expression before" "" { target c } 206 } */ + /* { dg-error "expression must be integral" "" { target c++ } 206 } */ + +#pragma acc wait async (1.0) + /* { dg-error "expected integer expression before" "" { target c } 210 } */ + /* { dg-error "expression must be integral" "" { target c++ } 210 } */ +} diff --git a/gcc/testsuite/c-c++-common/goacc/cache-1.c b/gcc/testsuite/c-c++-common/goacc/cache-1.c new file mode 100644 index 00000000000..950334102db --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/cache-1.c @@ -0,0 +1,88 @@ +int +main (int argc, char **argv) +{ +#define N 2 + int a[N], b[N]; + int i; + + for (i = 0; i < N; i++) + { + a[i] = 3; + b[i] = 0; + } + +#pragma acc parallel copyin (a[0:N]) copyout (b[0:N]) +{ + int ii; + + for (ii = 0; ii < N; ii++) + { + const int idx = ii; + int n = 1; + const int len = n; + +#pragma acc cache /* { dg-error "expected '\\\(' before end of line" } */ + +#pragma acc cache a[0:N] /* { dg-error "expected '\\\(' before 'a'" } */ + /* { dg-bogus "expected end of line before 'a'" "" { xfail c++ } 26 } */ + +#pragma acc cache (a) /* { dg-error "expected '\\\['" } */ + +#pragma acc cache ( /* { dg-error "expected (identifier|unqualified-id) before end of line" } */ + +#pragma acc cache () /* { dg-error "expected (identifier|unqualified-id) before '\\\)' token" } */ + +#pragma acc cache (,) /* { dg-error "expected (identifier|unqualified-id) before '(,|\\\))' token" } */ + +#pragma acc cache (a[0:N] /* { dg-error "expected '\\\)' before end of line" } */ + +#pragma acc cache (a[0:N],) /* { dg-error "expected (identifier|unqualified-id) before '(,|\\\))' token" "" { xfail c } } */ + +#pragma acc cache (a[0:N]) copyin (a[0:N]) /* { dg-error "expected end of line before 'copyin'" } */ + +#pragma acc cache () /* { dg-error "expected (identifier|unqualified-id) before '\\\)' token" } */ + +#pragma acc cache (a[0:N] b[0:N]) /* { dg-error "expected '\\\)' before 'b'" } */ + +#pragma acc cache (a[0:N] b[0:N}) /* { dg-error "expected '\\\)' before 'b'" } */ + /* { dg-bogus "expected end of line before '\\\}' token" "" { xfail c++ } 47 } */ + +#pragma acc cache (a[0:N] /* { dg-error "expected '\\\)' before end of line" } */ + +#pragma acc cache (a[ii]) /* { dg-error "'ii' is not a constant" } */ + +#pragma acc cache (a[idx:n]) /* { dg-error "'n' is not a constant" } */ + +#pragma acc cache (a[0:N]) ( /* { dg-error "expected end of line before '\\(' token" } */ + +#pragma acc cache (a[0:N]) ii /* { dg-error "expected end of line before 'ii'" } */ + +#pragma acc cache (a[0:N] ii) /* { dg-error "expected '\\)' before 'ii'" } */ + +#pragma acc cache (a[0:N]) + +#pragma acc cache (a[0:N], a[0:N]) + +#pragma acc cache (a[0:N], b[0:N]) + +#pragma acc cache (a[0]) + +#pragma acc cache (a[0], a[1], b[0:N]) + +#pragma acc cache (a[idx]) + +#pragma acc cache (a[idx:len]) + + b[ii] = a[ii]; + } +} + + + for (i = 0; i < N; i++) + { + if (a[i] != b[i]) + __builtin_abort (); + } + + return 0; +} diff --git a/gcc/testsuite/c-c++-common/goacc/clauses-fail.c b/gcc/testsuite/c-c++-common/goacc/clauses-fail.c new file mode 100644 index 00000000000..899018026d3 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/clauses-fail.c @@ -0,0 +1,18 @@ +void +f (void) +{ + int i; + +#pragma acc parallel one /* { dg-error "expected '#pragma acc' clause before 'one'" } */ + ; + +#pragma acc kernels eins /* { dg-error "expected '#pragma acc' clause before 'eins'" } */ + ; + +#pragma acc data two /* { dg-error "expected '#pragma acc' clause before 'two'" } */ + ; + +#pragma acc loop deux /* { dg-error "expected '#pragma acc' clause before 'deux'" } */ + for (i = 0; i < 2; ++i) + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/collapse-1.c b/gcc/testsuite/c-c++-common/goacc/collapse-1.c new file mode 100644 index 00000000000..11b14383983 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/collapse-1.c @@ -0,0 +1,97 @@ +/* { dg-skip-if "not yet" { c++ } } */ + +int i, j, k; +extern int foo (void); + +void +f1 (void) +{ + #pragma acc parallel + #pragma acc loop collapse (2) + for (i = 0; i < 5; i++) + ; /* { dg-error "not enough perfectly nested" } */ + { + for (j = 0; j < 5; j++) + ; + } +} + +void +f2 (void) +{ + #pragma acc parallel + #pragma acc loop collapse (2) + for (i = 0; i < 5; i++) + { + { + { + for (j = 0; j < 5; j++) + { + } + } + } + } +} + +void +f3 (void) +{ + #pragma acc parallel + #pragma acc loop collapse (2) + for (i = 0; i < 5; i++) + { + int k = foo (); /* { dg-error "not enough perfectly nested" } */ + { + { + for (j = 0; j < 5; j++) + { + } + } + } + } +} + +void +f4 (void) +{ + #pragma acc parallel + #pragma acc loop collapse (2) + for (i = 0; i < 5; i++) + { + { + for (j = 0; j < 5; j++) + ; + foo (); /* { dg-error "collapsed loops not perfectly nested before" } */ + } + } +} + +void +f5 (void) +{ + #pragma acc parallel + #pragma acc loop collapse (2) + for (i = 0; i < 5; i++) + { + { + for (j = 0; j < 5; j++) + ; + } + foo (); /* { dg-error "collapsed loops not perfectly nested before" } */ + } +} + +void +f6 (void) +{ + #pragma acc parallel + #pragma acc loop collapse (2) + for (i = 0; i < 5; i++) + { + { + for (j = 0; j < 5; j++) + ; + } + } + foo (); +} diff --git a/gcc/testsuite/c-c++-common/goacc/data-1.c b/gcc/testsuite/c-c++-common/goacc/data-1.c new file mode 100644 index 00000000000..8094575de19 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/data-1.c @@ -0,0 +1,6 @@ +void +foo (void) +{ +#pragma acc data + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/data-2.c b/gcc/testsuite/c-c++-common/goacc/data-2.c new file mode 100644 index 00000000000..a67d8a4d789 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/data-2.c @@ -0,0 +1,21 @@ +void +foo (void) +{ + int a, b[100]; + int n; +#pragma acc enter data copyin (a, b) async wait +#pragma acc enter data create (b[20:30]) async wait +#pragma acc enter data (a) /* { dg-error "expected '#pragma acc' clause before '\\\(' token" } */ +#pragma acc enter data create (b(1:10)) /* { dg-error "expected '\\\)' before '\\\(' token" } */ +#pragma acc exit data delete (a) if (0) +#pragma acc exit data copyout (b) if (a) +#pragma acc exit data delete (b) +#pragma acc enter /* { dg-error "expected 'data' in" } */ +#pragma acc exit /* { dg-error "expected 'data' in" } */ +#pragma acc enter data /* { dg-error "has no data movement clause" } */ +#pragma acc exit data /* { dg-error "has no data movement clause" } */ +#pragma acc enter Data /* { dg-error "invalid pragma before" } */ +#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */ +} + +/* { dg-error "has no data movement clause" "" { target *-*-* } 8 } */ diff --git a/gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c b/gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c new file mode 100644 index 00000000000..7a1cf683f24 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c @@ -0,0 +1,13 @@ +void +fun (void) +{ + float *fp; +#pragma acc parallel copy(fp[0:2],fp[0:2]) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +#pragma acc kernels present_or_copyin(fp[3]) present_or_copyout(fp[7:4]) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +#pragma acc data create(fp[:10]) deviceptr(fp) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +#pragma acc data create(fp) present(fp) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/deviceptr-1.c b/gcc/testsuite/c-c++-common/goacc/deviceptr-1.c new file mode 100644 index 00000000000..546fa82958e --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/deviceptr-1.c @@ -0,0 +1,86 @@ +/* { dg-skip-if "not yet" { c++ } } */ + +void +fun1 (void) +{ +#pragma acc parallel deviceptr(u) /* { dg-error "'u' undeclared" } */ + ; +#pragma acc kernels deviceptr(u[0:4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */ + ; + +#pragma acc data deviceptr(fun1) /* { dg-error "'fun1' is not a variable" } */ + ; +#pragma acc parallel deviceptr(fun1[2:5]) + /* { dg-error "'fun1' is not a variable" "not a variable" { target *-*-* } 13 } */ + /* { dg-error "expected '\\\)' before '\\\[' token" "array" { target *-*-* } 13 } */ + ; + + int i; +#pragma acc kernels deviceptr(i) /* { dg-error "'i' is not a pointer variable" } */ + ; +#pragma acc data deviceptr(i[0:4]) + /* { dg-error "'i' is not a pointer variable" "not a pointer variable" { target *-*-* } 21 } */ + /* { dg-error "expected '\\\)' before '\\\[' token" "array" { target *-*-* } 21 } */ + ; + + float fa[10]; +#pragma acc parallel deviceptr(fa) /* { dg-error "'fa' is not a pointer variable" } */ + ; +#pragma acc kernels deviceptr(fa[1:5]) + /* { dg-error "'fa' is not a pointer variable" "not a pointer variable" { target *-*-* } 29 } */ + /* { dg-error "expected '\\\)' before '\\\[' token" "array" { target *-*-* } 29 } */ + ; + + float *fp; +#pragma acc data deviceptr(fp) + ; +#pragma acc parallel deviceptr(fp[0:4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */ + ; +} + +void +fun2 (void) +{ + int i; + float *fp; +#pragma acc kernels deviceptr(fp,u,fun2,i,fp) + /* { dg-error "'u' undeclared" "u undeclared" { target *-*-* } 46 } */ + /* { dg-error "'fun2' is not a variable" "fun2 not a variable" { target *-*-* } 46 } */ + /* { dg-error "'i' is not a pointer variable" "i not a pointer variable" { target *-*-* } 46 } */ + /* { dg-error "'fp' appears more than once in map clauses" "fp more than once" { target *-*-* } 46 } */ + ; +} + +void +fun3 (void) +{ + float *fp; +#pragma acc data deviceptr(fp,fp) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +#pragma acc parallel deviceptr(fp) deviceptr(fp) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +#pragma acc kernels copy(fp) deviceptr(fp) /* { dg-error "'fp' appears more than once in map clauses" } */ + ; +} + +extern struct s s1; +extern struct s s2[1]; /* { dg-error "array type has incomplete element type" "" { target c } } */ + +void +fun4 (void) +{ + struct s *s1_p = &s1; + struct s *s2_p = &s2; + +#pragma acc parallel deviceptr(s1) /* { dg-error "'s1' is not a pointer variable" } */ + ; + +#pragma acc parallel deviceptr(s2) + ; + +#pragma acc parallel deviceptr(s1_p) + s1_p = 0; + +#pragma acc parallel deviceptr(s2_p) + s2_p = 0; +} diff --git a/gcc/testsuite/c-c++-common/goacc/deviceptr-2.c b/gcc/testsuite/c-c++-common/goacc/deviceptr-2.c new file mode 100644 index 00000000000..ac162b4989b --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/deviceptr-2.c @@ -0,0 +1,23 @@ +void +fun1 (void) +{ + char *a = 0; + +#pragma acc data deviceptr(a) + ++a; + +#pragma acc data deviceptr(a) +#pragma acc parallel + ++a; + +#pragma acc data deviceptr(a) +#pragma acc parallel deviceptr(a) + ++a; + +#pragma acc data +#pragma acc parallel deviceptr(a) + ++a; + +#pragma acc parallel deviceptr(a) + ++a; +} diff --git a/gcc/testsuite/c-c++-common/goacc/deviceptr-3.c b/gcc/testsuite/c-c++-common/goacc/deviceptr-3.c new file mode 100644 index 00000000000..bab56c3e652 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/deviceptr-3.c @@ -0,0 +1,11 @@ +float *d_a; + +void +f (float *a) +{ +#pragma acc parallel copyout (a[3:10]) deviceptr (d_a) + d_a[2] += 1.0; + +#pragma acc parallel deviceptr (d_a) copyout (a[3:10]) + d_a[2] += 1.0; +} diff --git a/gcc/testsuite/c-c++-common/goacc/if-clause-1.c b/gcc/testsuite/c-c++-common/goacc/if-clause-1.c new file mode 100644 index 00000000000..85abf1659e9 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/if-clause-1.c @@ -0,0 +1,10 @@ +/* { dg-skip-if "not yet" { c++ } } */ + +void +f (void) +{ + struct { int i; } *p; +#pragma acc data copyout(p) if(1) if(1) /* { dg-error "too many 'if' clauses" } */ + ; +#pragma acc update device(p) if(*p) /* { dg-error "used struct type value where scalar is required" } */ +} diff --git a/gcc/testsuite/c-c++-common/goacc/if-clause-2.c b/gcc/testsuite/c-c++-common/goacc/if-clause-2.c new file mode 100644 index 00000000000..5ab8459d732 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/if-clause-2.c @@ -0,0 +1,11 @@ +void +f (short c) +{ +#pragma acc parallel if(c) + ; +#pragma acc kernels if(c) + ; +#pragma acc data if(c) + ; +#pragma acc update device(c) if(c) +} diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-1.c b/gcc/testsuite/c-c++-common/goacc/kernels-1.c new file mode 100644 index 00000000000..e91b81c8d04 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-1.c @@ -0,0 +1,6 @@ +void +foo (void) +{ +#pragma acc kernels + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/loop-1.c b/gcc/testsuite/c-c++-common/goacc/loop-1.c new file mode 100644 index 00000000000..fea40e0ab61 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/loop-1.c @@ -0,0 +1,72 @@ +/* { dg-skip-if "not yet" { c++ } } */ + +int test1() +{ + int i, j, k, b[10]; + int a[30]; + double d; + float r; + i = 0; + #pragma acc loop + while(1) /* { dg-error "for statement expected" } */ + { + if (i > 0) break; + i = i + 1; + } + i = 0; + #pragma acc loop + for(;;) /* { dg-error "expected iteration declaration or initialization" } */ + { + if (i > 0) break; /* { dg-error "break statement used" } */ + i = i + 1; + } + i = 0; + #pragma acc loop + do /* { dg-error "for statement expected" } */ + { + i = i + 1; + } + while (i < 4); + #pragma acc loop + while (i < 8) /* { dg-error "for statement expected" } */ + { + i = i + 1; + } + #pragma acc loop + for (d = 1; d < 30; d+= 6) /* { dg-error "invalid type for iteration variable" } */ + { + i = d; + a[i] = 1; + } + #pragma acc loop + for (i = 1; i < 30; i++ ) + if (i == 16) break; /* { dg-error "break statement used" } */ + +/* different types of for loop are allowed */ + #pragma acc loop + for (i = 1; i < 10; i++) + { + } + #pragma acc loop + for (i = 1; i < 10; i+=2) + { + a[i] = i; + } + + /* after loop directive must be loop */ + #pragma acc loop + a[1] = 1; /* { dg-error "for statement expected" } */ + for (i = 1; i < 10; i++) + ; + /* combined directives may be used*/ + #pragma acc parallel loop + for(i = 1; i < 10; i++) + { + } + #pragma acc kernels loop + for(i = 1; i < 10; i++) + { + } + return 0; +} +/* { dg-prune-output "sorry, unimplemented: directive not yet implemented" } */ diff --git a/gcc/testsuite/c-c++-common/goacc/loop-private-1.c b/gcc/testsuite/c-c++-common/goacc/loop-private-1.c new file mode 100644 index 00000000000..a54edb2c05f --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/loop-private-1.c @@ -0,0 +1,14 @@ +/* { dg-additional-options "-fdump-tree-gimple" } */ + +void +f (int i, int j) +{ +#pragma acc kernels +#pragma acc loop collapse(2) + for (i = 0; i < 20; ++i) + for (j = 0; j < 25; ++j) + ; +} + +/* { dg-final { scan-tree-dump-times "#pragma acc loop collapse\\(2\\) private\\(j\\) private\\(i\\)" 1 "gimple" } } */ +/* { dg-final { cleanup-tree-dump "gimple" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/nesting-1.c b/gcc/testsuite/c-c++-common/goacc/nesting-1.c new file mode 100644 index 00000000000..b4b863fb860 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/nesting-1.c @@ -0,0 +1,101 @@ +extern int i; + +void +f_acc_parallel (void) +{ +#pragma acc parallel + { +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } +} + + +void +f_acc_kernels (void) +{ +#pragma acc kernels + { +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } +} + + +void +f_acc_data (void) +{ +#pragma acc data + { +#pragma acc parallel + ; + +#pragma acc parallel + { +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } + +#pragma acc kernels + ; + +#pragma acc kernels + { +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } + +#pragma acc data + ; + +#pragma acc update host(i) + +#pragma acc enter data copyin(i) + +#pragma acc exit data delete(i) + +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + +#pragma acc data + { +#pragma acc parallel + ; + +#pragma acc parallel + { +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } + +#pragma acc kernels + ; + +#pragma acc kernels + { +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } + +#pragma acc data + ; + +#pragma acc update host(i) + +#pragma acc enter data copyin(i) + +#pragma acc exit data delete(i) + +#pragma acc loop + for (i = 0; i < 2; ++i) + ; + } + } +} diff --git a/gcc/testsuite/c-c++-common/goacc/nesting-data-1.c b/gcc/testsuite/c-c++-common/goacc/nesting-data-1.c new file mode 100644 index 00000000000..fefe6cd16f7 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/nesting-data-1.c @@ -0,0 +1,61 @@ +void +f (void) +{ + unsigned char c, ca[15], caa[20][30]; + +#pragma acc data copyin(c) + { + c = 5; + ca[3] = c; + caa[3][12] = ca[3] + caa[3][12]; + +#pragma acc data copyin(ca[2:4]) + { + c = 6; + ca[4] = c; + caa[3][12] = ca[3] + caa[3][12]; + } + +#pragma acc parallel copyout(ca[3:4]) + { + c = 7; + ca[5] = c; + caa[3][12] = ca[3] + caa[3][12]; + } + +#pragma acc kernels copy(ca[4:4]) + { + c = 8; + ca[6] = c; + caa[3][12] = ca[3] + caa[3][12]; + } + +#pragma acc data pcopy(ca[5:7]) + { + c = 15; + ca[7] = c; + caa[3][12] = ca[3] + caa[3][12]; + +#pragma acc data pcopyin(caa[3:7][0:30]) + { + c = 16; + ca[8] = c; + caa[3][12] = ca[3] + caa[3][12]; + } + +#pragma acc parallel pcopyout(caa[3:7][0:30]) + { + c = 17; + ca[9] = c; + caa[3][12] = ca[3] + caa[3][12]; + } + +#pragma acc kernels pcopy(caa[3:7][0:30]) + { + c = 18; + ca[10] = c; + caa[3][12] = ca[3] + caa[3][12]; + } + } + } +} diff --git a/gcc/testsuite/c-c++-common/goacc/nesting-fail-1.c b/gcc/testsuite/c-c++-common/goacc/nesting-fail-1.c new file mode 100644 index 00000000000..8af1c8244f0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/nesting-fail-1.c @@ -0,0 +1,39 @@ +extern int i; + +/* While the OpenACC specification does allow for certain kinds of + nesting, we don't support many of these yet. */ +void +f_acc_parallel (void) +{ +#pragma acc parallel + { +#pragma acc parallel /* { dg-bogus "parallel construct inside of parallel region" "not implemented" { xfail *-*-* } } */ + ; +#pragma acc kernels /* { dg-bogus "kernels construct inside of parallel region" "not implemented" { xfail *-*-* } } */ + ; +#pragma acc data /* { dg-error "data construct inside of parallel region" } */ + ; +#pragma acc update host(i) /* { dg-error "update construct inside of parallel region" } */ +#pragma acc enter data copyin(i) /* { dg-error "enter/exit data construct inside of parallel region" } */ +#pragma acc exit data delete(i) /* { dg-error "enter/exit data construct inside of parallel region" } */ + } +} + +/* While the OpenACC specification does allow for certain kinds of + nesting, we don't support many of these yet. */ +void +f_acc_kernels (void) +{ +#pragma acc kernels + { +#pragma acc parallel /* { dg-bogus "parallel construct inside of kernels region" "not implemented" { xfail *-*-* } } */ + ; +#pragma acc kernels /* { dg-bogus "kernels construct inside of kernels region" "not implemented" { xfail *-*-* } } */ + ; +#pragma acc data /* { dg-error "data construct inside of kernels region" } */ + ; +#pragma acc update host(i) /* { dg-error "update construct inside of kernels region" } */ +#pragma acc enter data copyin(i) /* { dg-error "enter/exit data construct inside of kernels region" } */ +#pragma acc exit data delete(i) /* { dg-error "enter/exit data construct inside of kernels region" } */ + } +} diff --git a/gcc/testsuite/c-c++-common/goacc/parallel-1.c b/gcc/testsuite/c-c++-common/goacc/parallel-1.c new file mode 100644 index 00000000000..a8605266747 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/parallel-1.c @@ -0,0 +1,6 @@ +void +foo (void) +{ +#pragma acc parallel + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/pcopy.c b/gcc/testsuite/c-c++-common/goacc/pcopy.c new file mode 100644 index 00000000000..fd16525517d --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/pcopy.c @@ -0,0 +1,11 @@ +/* { dg-additional-options "-fdump-tree-original" } */ + +void +f (char *cp) +{ +#pragma acc parallel pcopy(cp[3:5]) + ; +} + +/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(tofrom:\\*\\(cp \\+ 3\\) \\\[len: 5]\\) map\\(alloc:cp \\\[pointer assign, bias: 3]\\)" 1 "original" } } */ +/* { dg-final { cleanup-tree-dump "original" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/pcopyin.c b/gcc/testsuite/c-c++-common/goacc/pcopyin.c new file mode 100644 index 00000000000..c009d24101e --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/pcopyin.c @@ -0,0 +1,11 @@ +/* { dg-additional-options "-fdump-tree-original" } */ + +void +f (char *cp) +{ +#pragma acc parallel pcopyin(cp[4:6]) + ; +} + +/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(to:\\*\\(cp \\+ 4\\) \\\[len: 6]\\) map\\(alloc:cp \\\[pointer assign, bias: 4]\\)" 1 "original" } } */ +/* { dg-final { cleanup-tree-dump "original" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/pcopyout.c b/gcc/testsuite/c-c++-common/goacc/pcopyout.c new file mode 100644 index 00000000000..6099effd8f4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/pcopyout.c @@ -0,0 +1,11 @@ +/* { dg-additional-options "-fdump-tree-original" } */ + +void +f (char *cp) +{ +#pragma acc parallel pcopyout(cp[5:7]) + ; +} + +/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(from:\\*\\(cp \\+ 5\\) \\\[len: 7]\\) map\\(alloc:cp \\\[pointer assign, bias: 5]\\)" 1 "original" } } */ +/* { dg-final { cleanup-tree-dump "original" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/pcreate.c b/gcc/testsuite/c-c++-common/goacc/pcreate.c new file mode 100644 index 00000000000..2f6e836e6fb --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/pcreate.c @@ -0,0 +1,11 @@ +/* { dg-additional-options "-fdump-tree-original" } */ + +void +f (char *cp) +{ +#pragma acc parallel pcreate(cp[6:8]) + ; +} + +/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(alloc:\\*\\(cp \\+ 6\\) \\\[len: 8]\\) map\\(alloc:cp \\\[pointer assign, bias: 6]\\)" 1 "original" } } */ +/* { dg-final { cleanup-tree-dump "original" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/pragma_context.c b/gcc/testsuite/c-c++-common/goacc/pragma_context.c new file mode 100644 index 00000000000..680dc9bba0e --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/pragma_context.c @@ -0,0 +1,34 @@ +/* { dg-skip-if "not yet" { c++ } } */ + +// pragma_external +#pragma acc update /* { dg-error "expected declaration specifiers before '#pragma'" } */ + +// pragma_struct +struct s_pragma_struct +{ +#pragma acc update /* { dg-error "expected declaration specifiers before '#pragma'" } */ +}; + +// pragma_param +void +f_pragma_param ( +#pragma acc update /* { dg-error "expected declaration specifiers before '#pragma'" } */ + void) +{ +} + +// pragma_stmt +void +f2 (void) +{ + if (0) +#pragma acc update /* { dg-error "'#pragma acc update' may only be used in compound statements before '#pragma'" } */ +} + +// pragma_compound +void +f3 (void) +{ + int i = 0; +#pragma acc update device(i) +} diff --git a/gcc/testsuite/c-c++-common/goacc/present-1.c b/gcc/testsuite/c-c++-common/goacc/present-1.c new file mode 100644 index 00000000000..03ee5921441 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/present-1.c @@ -0,0 +1,11 @@ +/* { dg-additional-options "-fdump-tree-original" } */ + +void +f (char *cp) +{ +#pragma acc parallel present(cp[7:9]) + ; +} + +/* { dg-final { scan-tree-dump-times "#pragma acc parallel map\\(force_present:\\*\\(cp \\+ 7\\) \\\[len: 9]\\) map\\(alloc:cp \\\[pointer assign, bias: 7]\\)" 1 "original" } } */ +/* { dg-final { cleanup-tree-dump "original" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-1.c b/gcc/testsuite/c-c++-common/goacc/reduction-1.c new file mode 100644 index 00000000000..0f500829831 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/reduction-1.c @@ -0,0 +1,71 @@ +/* Integer reductions. */ + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + int result, array[n]; + int lresult; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; + +// result = 0; +// vresult = 0; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; + + /* '&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&:result) + for (i = 0; i < n; i++) + result &= array[i]; + + /* '|' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (|:result) + for (i = 0; i < n; i++) + result |= array[i]; + + /* '^' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (^:result) + for (i = 0; i < n; i++) + result ^= array[i]; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); + + return 0; +} diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-2.c b/gcc/testsuite/c-c++-common/goacc/reduction-2.c new file mode 100644 index 00000000000..1f95138ff0b --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/reduction-2.c @@ -0,0 +1,50 @@ +/* float reductions. */ + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + float result, array[n]; + int lresult; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; + +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); + + return 0; +} diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-3.c b/gcc/testsuite/c-c++-common/goacc/reduction-3.c new file mode 100644 index 00000000000..476e375c654 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/reduction-3.c @@ -0,0 +1,50 @@ +/* double reductions. */ + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + double result, array[n]; + int lresult; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; + +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); + + return 0; +} diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-4.c b/gcc/testsuite/c-c++-common/goacc/reduction-4.c new file mode 100644 index 00000000000..73dde869a9d --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/reduction-4.c @@ -0,0 +1,52 @@ +/* complex reductions. */ + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + __complex__ double result, array[n]; + int lresult; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* Needs support for complex multiplication. */ + +// /* '*' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (*:result) +// for (i = 0; i < n; i++) +// result *= array[i]; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (__real__(result) > __real__(array[i])); + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (__real__(result) > __real__(array[i])); + + return 0; +} diff --git a/gcc/testsuite/c-c++-common/goacc/sb-1.c b/gcc/testsuite/c-c++-common/goacc/sb-1.c new file mode 100644 index 00000000000..5e55c9516f2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/sb-1.c @@ -0,0 +1,75 @@ +// { dg-skip-if "not yet" { c++ } } + +void foo() +{ + int l; + + bad1: + #pragma acc parallel + goto bad1; // { dg-error "invalid branch to/from OpenACC structured block" } + #pragma acc kernels + goto bad1; // { dg-error "invalid branch to/from OpenACC structured block" } + #pragma acc data + goto bad1; // { dg-error "invalid branch to/from OpenACC structured block" } + #pragma acc loop + for (l = 0; l < 2; ++l) + goto bad1; // { dg-error "invalid branch to/from OpenACC structured block" } + + goto bad2_parallel; // { dg-error "invalid entry to OpenACC structured block" } + #pragma acc parallel + { + bad2_parallel: ; + } + + goto bad2_kernels; // { dg-error "invalid entry to OpenACC structured block" } + #pragma acc kernels + { + bad2_kernels: ; + } + + goto bad2_data; // { dg-error "invalid entry to OpenACC structured block" } + #pragma acc data + { + bad2_data: ; + } + + goto bad2_loop; // { dg-error "invalid entry to OpenACC structured block" } + #pragma acc loop + for (l = 0; l < 2; ++l) + { + bad2_loop: ; + } + + #pragma acc parallel + { + int i; + goto ok1_parallel; + for (i = 0; i < 10; ++i) + { ok1_parallel: break; } + } + + #pragma acc kernels + { + int i; + goto ok1_kernels; + for (i = 0; i < 10; ++i) + { ok1_kernels: break; } + } + + #pragma acc data + { + int i; + goto ok1_data; + for (i = 0; i < 10; ++i) + { ok1_data: break; } + } + + #pragma acc loop + for (l = 0; l < 2; ++l) + { + int i; + goto ok1_loop; + for (i = 0; i < 10; ++i) + { ok1_loop: break; } + } +} diff --git a/gcc/testsuite/c-c++-common/goacc/sb-2.c b/gcc/testsuite/c-c++-common/goacc/sb-2.c new file mode 100644 index 00000000000..a6760ec73f8 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/sb-2.c @@ -0,0 +1,22 @@ +// { dg-skip-if "not yet" { c++ } } + +void foo(int i) +{ + switch (i) // { dg-error "invalid entry to OpenACC structured block" } + { + #pragma acc parallel + { case 0:; } + } + + switch (i) // { dg-error "invalid entry to OpenACC structured block" } + { + #pragma acc kernels + { case 0:; } + } + + switch (i) // { dg-error "invalid entry to OpenACC structured block" } + { + #pragma acc data + { case 0:; } + } +} diff --git a/gcc/testsuite/c-c++-common/goacc/sb-3.c b/gcc/testsuite/c-c++-common/goacc/sb-3.c new file mode 100644 index 00000000000..147b7b0e845 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/sb-3.c @@ -0,0 +1,18 @@ +// { dg-skip-if "not yet" { c++ } } + +void f (void) +{ + int i, j; +#pragma acc loop + for(i = 1; i < 30; i++) + { + if (i == 7) goto out; // { dg-error "invalid branch to/from OpenACC structured block" } +#pragma acc loop // { dg-error "work-sharing region may not be closely nested inside of work-sharing, critical, ordered, master or explicit task region" } + for(j = 5; j < 10; j++) + { + if (i == 6 && j == 7) goto out; // { dg-error "invalid branch to/from OpenACC structured block" } + } + } + out: + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/update-1.c b/gcc/testsuite/c-c++-common/goacc/update-1.c new file mode 100644 index 00000000000..97e93794934 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/update-1.c @@ -0,0 +1,17 @@ +void +f (void) +{ +#pragma acc update /* { dg-error "'#pragma acc update' must contain at least one 'device' or 'host/self' clause" } */ + + int i = 0; + int a[10]; +#pragma acc update device(i) +#pragma acc update host(i) +#pragma acc update self(i) +#pragma acc update device(a[1:3]) +#pragma acc update host(a[1:3]) +#pragma acc update self(a[1:3]) +#pragma acc update device(a(1:3)) /* { dg-error "expected '\\\)' before '\\\(' token" } */ +#pragma acc update host(a(1:3)) /* { dg-error "expected '\\\)' before '\\\(' token" } */ +#pragma acc update self(a(1:3)) /* { dg-error "expected '\\\)' before '\\\(' token" } */ +} diff --git a/gcc/testsuite/g++.dg/goacc-gomp/goacc-gomp.exp b/gcc/testsuite/g++.dg/goacc-gomp/goacc-gomp.exp new file mode 100644 index 00000000000..7e74d2ba063 --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc-gomp/goacc-gomp.exp @@ -0,0 +1,36 @@ +# Copyright (C) 2006-2015 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Load support procs. +load_lib g++-dg.exp + +if { ![check_effective_target_fopenacc] \ + || ![check_effective_target_fopenmp] } { + return +} + +# Initialize `dg'. +dg-init + +# Main loop. +g++-dg-runtest [lsort [concat \ + [find $srcdir/$subdir *.C] \ + [find $srcdir/c-c++-common/goacc-gomp *.c]]] "" "-fopenacc -fopenmp" + +# All done. +dg-finish diff --git a/gcc/testsuite/g++.dg/goacc/goacc.exp b/gcc/testsuite/g++.dg/goacc/goacc.exp new file mode 100644 index 00000000000..0e96dfaafa7 --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/goacc.exp @@ -0,0 +1,35 @@ +# Copyright (C) 2006-2015 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Load support procs. +load_lib g++-dg.exp + +if ![check_effective_target_fopenacc] { + return +} + +# Initialize `dg'. +dg-init + +# Main loop. +g++-dg-runtest [lsort [concat \ + [find $srcdir/$subdir *.C] \ + [find $srcdir/c-c++-common/goacc *.c]]] "" "-fopenacc" + +# All done. +dg-finish diff --git a/gcc/testsuite/g++.dg/gomp/block-1.C b/gcc/testsuite/g++.dg/gomp/block-1.C index 3ec6ac8e458..b0704ac7fa2 100644 --- a/gcc/testsuite/g++.dg/gomp/block-1.C +++ b/gcc/testsuite/g++.dg/gomp/block-1.C @@ -22,5 +22,5 @@ void foo() } } -// { dg-message "error: invalid branch to/from an OpenMP structured block" "" { target *-*-* } 7 } +// { dg-message "error: invalid branch to/from OpenMP structured block" "" { target *-*-* } 7 } // { dg-message "error: invalid entry to OpenMP structured block" "" { target *-*-* } 9 } diff --git a/gcc/testsuite/g++.dg/gomp/block-2.C b/gcc/testsuite/g++.dg/gomp/block-2.C index 7d572c17049..5336c5a30a1 100644 --- a/gcc/testsuite/g++.dg/gomp/block-2.C +++ b/gcc/testsuite/g++.dg/gomp/block-2.C @@ -32,5 +32,5 @@ void foo() continue; } -// { dg-message "error: invalid branch to/from an OpenMP structured block" "" { target *-*-* } 14 } +// { dg-message "error: invalid branch to/from OpenMP structured block" "" { target *-*-* } 14 } // { dg-message "error: invalid entry to OpenMP structured block" "" { target *-*-* } 16 } diff --git a/gcc/testsuite/g++.dg/gomp/block-3.C b/gcc/testsuite/g++.dg/gomp/block-3.C index 4b98d1cdffc..23f97268edd 100644 --- a/gcc/testsuite/g++.dg/gomp/block-3.C +++ b/gcc/testsuite/g++.dg/gomp/block-3.C @@ -60,6 +60,6 @@ void foo() } } -// { dg-message "error: invalid branch to/from an OpenMP structured block" "" { target *-*-* } 21 } -// { dg-message "error: invalid branch to/from an OpenMP structured block" "" { target *-*-* } 26 } +// { dg-message "error: invalid branch to/from OpenMP structured block" "" { target *-*-* } 21 } +// { dg-message "error: invalid branch to/from OpenMP structured block" "" { target *-*-* } 26 } // { dg-message "error: invalid entry to OpenMP structured block" "" { target *-*-* } 31 } diff --git a/gcc/testsuite/g++.dg/gomp/block-5.C b/gcc/testsuite/g++.dg/gomp/block-5.C index 5023e3792ee..d6d28c4b4c3 100644 --- a/gcc/testsuite/g++.dg/gomp/block-5.C +++ b/gcc/testsuite/g++.dg/gomp/block-5.C @@ -15,4 +15,4 @@ void foo() } } -// { dg-message "error: invalid branch to/from an OpenMP structured block" "" { target *-*-* } 7 } +// { dg-message "error: invalid branch to/from OpenMP structured block" "" { target *-*-* } 7 } diff --git a/gcc/testsuite/g++.dg/gomp/target-1.C b/gcc/testsuite/g++.dg/gomp/target-1.C index ae2398804c7..bcdac61ee31 100644 --- a/gcc/testsuite/g++.dg/gomp/target-1.C +++ b/gcc/testsuite/g++.dg/gomp/target-1.C @@ -30,5 +30,5 @@ foo (int x) } } -// { dg-error "invalid branch to/from an OpenMP structured block" "" { target *-*-* } 8 } +// { dg-error "invalid branch to/from OpenMP structured block" "" { target *-*-* } 8 } // { dg-error "invalid entry to OpenMP structured block" "" { target *-*-* } 10 } diff --git a/gcc/testsuite/g++.dg/gomp/target-2.C b/gcc/testsuite/g++.dg/gomp/target-2.C index 6bf8b189336..273f8d50fef 100644 --- a/gcc/testsuite/g++.dg/gomp/target-2.C +++ b/gcc/testsuite/g++.dg/gomp/target-2.C @@ -30,5 +30,5 @@ foo (int x, int y) } } -// { dg-error "invalid branch to/from an OpenMP structured block" "" { target *-*-* } 8 } +// { dg-error "invalid branch to/from OpenMP structured block" "" { target *-*-* } 8 } // { dg-error "invalid entry to OpenMP structured block" "" { target *-*-* } 10 } diff --git a/gcc/testsuite/g++.dg/gomp/taskgroup-1.C b/gcc/testsuite/g++.dg/gomp/taskgroup-1.C index c31aa61583f..e15d59d947a 100644 --- a/gcc/testsuite/g++.dg/gomp/taskgroup-1.C +++ b/gcc/testsuite/g++.dg/gomp/taskgroup-1.C @@ -30,5 +30,5 @@ foo (int x) } } -// { dg-error "invalid branch to/from an OpenMP structured block" "" { target *-*-* } 8 } +// { dg-error "invalid branch to/from OpenMP structured block" "" { target *-*-* } 8 } // { dg-error "invalid entry to OpenMP structured block" "" { target *-*-* } 10 } diff --git a/gcc/testsuite/g++.dg/gomp/teams-1.C b/gcc/testsuite/g++.dg/gomp/teams-1.C index 86abe12b658..2b00bb61473 100644 --- a/gcc/testsuite/g++.dg/gomp/teams-1.C +++ b/gcc/testsuite/g++.dg/gomp/teams-1.C @@ -64,7 +64,7 @@ bar (int x) } } -// { dg-error "invalid branch to/from an OpenMP structured block" "" { target *-*-* } 8 } +// { dg-error "invalid branch to/from OpenMP structured block" "" { target *-*-* } 8 } // { dg-error "invalid entry to OpenMP structured block" "" { target *-*-* } 10 } -// { dg-error "invalid branch to/from an OpenMP structured block" "" { target *-*-* } 39 } +// { dg-error "invalid branch to/from OpenMP structured block" "" { target *-*-* } 39 } // { dg-error "invalid entry to OpenMP structured block" "" { target *-*-* } 41 } diff --git a/gcc/testsuite/gcc.dg/cilk-plus/jump-openmp.c b/gcc/testsuite/gcc.dg/cilk-plus/jump-openmp.c index 95e6b2d4403..6adabf4ae06 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/jump-openmp.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/jump-openmp.c @@ -11,7 +11,7 @@ void foo() { a[i] = b[i]; if (c == 5) - return; /* { dg-error "invalid branch to/from a Cilk Plus structured block" } */ + return; /* { dg-error "invalid branch to/from Cilk Plus structured block" } */ } } @@ -31,7 +31,7 @@ void baz() { bad1: #pragma omp parallel - goto bad1; /* { dg-error "invalid branch to/from an OpenMP structured block" } */ + goto bad1; /* { dg-error "invalid branch to/from OpenMP structured block" } */ goto bad2; /* { dg-error "invalid entry to OpenMP structured block" } */ #pragma omp parallel diff --git a/gcc/testsuite/gcc.dg/cilk-plus/jump.c b/gcc/testsuite/gcc.dg/cilk-plus/jump.c index 9ec3293cc97..1ca886a645f 100644 --- a/gcc/testsuite/gcc.dg/cilk-plus/jump.c +++ b/gcc/testsuite/gcc.dg/cilk-plus/jump.c @@ -10,7 +10,7 @@ void foo() { a[i] = b[i]; if (c == 5) - return; /* { dg-error "invalid branch to.from a Cilk" } */ + return; /* { dg-error "invalid branch to/from Cilk Plus structured block" } */ } } @@ -23,5 +23,5 @@ void bar() a[i] = b[i]; } if (c == 6) - goto lab; /* { dg-error "invalid entry to Cilk Plus" } */ + goto lab; /* { dg-error "invalid entry to Cilk Plus structured block" } */ } diff --git a/gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp b/gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp new file mode 100644 index 00000000000..50365bdb309 --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc-gomp/goacc-gomp.exp @@ -0,0 +1,38 @@ +# Copyright (C) 2006-2015 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gcc-dg.exp + +if { ![check_effective_target_fopenacc] \ + || ![check_effective_target_fopenmp] } { + return +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [concat \ + [find $srcdir/$subdir *.c] \ + [find $srcdir/c-c++-common/goacc-gomp *.c]]] "" "-fopenacc -fopenmp" + +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.dg/goacc/acc_on_device-1.c b/gcc/testsuite/gcc.dg/goacc/acc_on_device-1.c new file mode 100644 index 00000000000..1a0276e04d2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc/acc_on_device-1.c @@ -0,0 +1,20 @@ +/* Have to enable optimizations, as otherwise builtins won't be expanded. */ +/* { dg-additional-options "-O -fdump-rtl-expand -std=c89 -Wno-implicit-function-declaration" } */ + +int +f (void) +{ + int r = 0; + + r |= acc_on_device (); + r |= acc_on_device (1, 2); + r |= acc_on_device (3.14); + r |= acc_on_device ("hello"); + + return r; +} + +/* Unsuitable to be handled as a builtin, so we're expecting four calls. + { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]*\\\"acc_on_device" 4 "expand" } } */ + +/* { dg-final { cleanup-rtl-dump "expand" } } */ diff --git a/gcc/testsuite/gcc.dg/goacc/goacc.exp b/gcc/testsuite/gcc.dg/goacc/goacc.exp new file mode 100644 index 00000000000..dd8c424d683 --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc/goacc.exp @@ -0,0 +1,37 @@ +# Copyright (C) 2006-2015 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gcc-dg.exp + +if ![check_effective_target_fopenacc] { + return +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [concat \ + [find $srcdir/$subdir *.c] \ + [find $srcdir/c-c++-common/goacc *.c]]] "" "-fopenacc" + +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.dg/gomp/block-1.c b/gcc/testsuite/gcc.dg/gomp/block-1.c index dd7fe7783a9..e67e6c3fce5 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-1.c +++ b/gcc/testsuite/gcc.dg/gomp/block-1.c @@ -4,9 +4,9 @@ void foo() { bad1: #pragma omp parallel - goto bad1; // { dg-error "invalid branch" } + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } - goto bad2; // { dg-error "invalid entry" } + goto bad2; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp parallel { bad2: ; diff --git a/gcc/testsuite/gcc.dg/gomp/block-10.c b/gcc/testsuite/gcc.dg/gomp/block-10.c index 76ee3974508..69ae3c0cf6e 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-10.c +++ b/gcc/testsuite/gcc.dg/gomp/block-10.c @@ -3,28 +3,28 @@ void foo(int i) { int j; - switch (i) // { dg-error "invalid entry" } + switch (i) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp parallel { case 0:; } } - switch (i) // { dg-error "invalid entry" } + switch (i) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp for for (j = 0; j < 10; ++ j) { case 1:; } } - switch (i) // { dg-error "invalid entry" } + switch (i) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp critical { case 2:; } } - switch (i) // { dg-error "invalid entry" } + switch (i) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp master { case 3:; } } - switch (i) // { dg-error "invalid entry" } + switch (i) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp sections { case 4:; @@ -32,7 +32,7 @@ void foo(int i) { case 5:; } } } - switch (i) // { dg-error "invalid entry" } + switch (i) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp ordered { default:; } diff --git a/gcc/testsuite/gcc.dg/gomp/block-2.c b/gcc/testsuite/gcc.dg/gomp/block-2.c index 4c56add570c..5c014633b67 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-2.c +++ b/gcc/testsuite/gcc.dg/gomp/block-2.c @@ -11,9 +11,9 @@ void foo() bad1: #pragma omp for for (i = 0; i < 10; ++i) - goto bad1; // { dg-error "invalid branch" } + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } - goto bad2; // { dg-error "invalid entry" } + goto bad2; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp for for (i = 0; i < 10; ++i) { diff --git a/gcc/testsuite/gcc.dg/gomp/block-3.c b/gcc/testsuite/gcc.dg/gomp/block-3.c index b4530e9d06c..0b21cb3923c 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-3.c +++ b/gcc/testsuite/gcc.dg/gomp/block-3.c @@ -9,7 +9,7 @@ void foo() { #pragma omp sections { - continue; // { dg-error "invalid branch" } + continue; // { dg-error "invalid branch to/from OpenMP structured block" } } } @@ -18,16 +18,16 @@ void foo() #pragma omp section { bad1: ; } #pragma omp section - goto bad1; // { dg-error "invalid branch" } + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } } #pragma omp sections { - goto bad2; // { dg-error "invalid branch" } + goto bad2; // { dg-error "invalid branch to/from OpenMP structured block" } } bad2:; - goto bad3; // { dg-error "invalid entry" } + goto bad3; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp sections { bad3: ; diff --git a/gcc/testsuite/gcc.dg/gomp/block-4.c b/gcc/testsuite/gcc.dg/gomp/block-4.c index 61f490c0033..b2ef9b1d604 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-4.c +++ b/gcc/testsuite/gcc.dg/gomp/block-4.c @@ -4,6 +4,6 @@ void foo() { #pragma omp critical { - return; // { dg-error "invalid branch" } + return; // { dg-error "invalid branch to/from OpenMP structured block" } } } diff --git a/gcc/testsuite/gcc.dg/gomp/block-5.c b/gcc/testsuite/gcc.dg/gomp/block-5.c index 741049fddea..7f3b37c8d22 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-5.c +++ b/gcc/testsuite/gcc.dg/gomp/block-5.c @@ -4,12 +4,12 @@ void foo() { #pragma omp master { - goto bad1; // { dg-error "invalid branch" } + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } } #pragma omp master { bad1: - return; // { dg-error "invalid branch" } + return; // { dg-error "invalid branch to/from OpenMP structured block" } } } diff --git a/gcc/testsuite/gcc.dg/gomp/block-6.c b/gcc/testsuite/gcc.dg/gomp/block-6.c index 87e6392e5b3..fc9fdc84672 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-6.c +++ b/gcc/testsuite/gcc.dg/gomp/block-6.c @@ -4,6 +4,6 @@ void foo() { #pragma omp ordered { - return; // { dg-error "invalid branch" } + return; // { dg-error "invalid branch to/from OpenMP structured block" } } } diff --git a/gcc/testsuite/gcc.dg/gomp/block-7.c b/gcc/testsuite/gcc.dg/gomp/block-7.c index 2bc1cdb5723..6219e7e4662 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-7.c +++ b/gcc/testsuite/gcc.dg/gomp/block-7.c @@ -6,15 +6,15 @@ void foo() for (i = 0; i < 10; ++i) { #pragma omp for - for (j = ({ continue; 0; }); // { dg-error "invalid branch" } - j < ({ continue; 10; }); // { dg-error "invalid branch" } - j += ({ continue; 1; })) // { dg-error "invalid branch" } + for (j = ({ continue; 0; }); // { dg-error "invalid branch to/from OpenMP structured block" } + j < ({ continue; 10; }); // { dg-error "invalid branch to/from OpenMP structured block" } + j += ({ continue; 1; })) // { dg-error "invalid branch to/from OpenMP structured block" } continue; #pragma omp for - for (j = ({ break; 0; }); // { dg-error "invalid branch" } - j < ({ break; 10; }); // { dg-error "invalid branch" } - j += ({ break; 1; })) // { dg-error "invalid branch" } + for (j = ({ break; 0; }); // { dg-error "invalid branch to/from OpenMP structured block" } + j < ({ break; 10; }); // { dg-error "invalid branch to/from OpenMP structured block" } + j += ({ break; 1; })) // { dg-error "invalid branch to/from OpenMP structured block" } break; // { dg-error "break" } } } diff --git a/gcc/testsuite/gcc.dg/gomp/block-8.c b/gcc/testsuite/gcc.dg/gomp/block-8.c index 3c717d927be..f41007060e8 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-8.c +++ b/gcc/testsuite/gcc.dg/gomp/block-8.c @@ -7,5 +7,5 @@ int foo() #pragma omp parallel for for (i = 0; i < 10; ++i) - return 0; // { dg-error "invalid branch" } + return 0; // { dg-error "invalid branch to/from OpenMP structured block" } } diff --git a/gcc/testsuite/gcc.dg/gomp/block-9.c b/gcc/testsuite/gcc.dg/gomp/block-9.c index 9217cb74990..2fae3deafd5 100644 --- a/gcc/testsuite/gcc.dg/gomp/block-9.c +++ b/gcc/testsuite/gcc.dg/gomp/block-9.c @@ -3,7 +3,7 @@ void foo(int i) { int j; - switch (i) // { dg-error "invalid entry" } + switch (i) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp parallel { case 0:; } diff --git a/gcc/testsuite/gcc.dg/gomp/target-1.c b/gcc/testsuite/gcc.dg/gomp/target-1.c index 09e65bd3115..aaa6a145779 100644 --- a/gcc/testsuite/gcc.dg/gomp/target-1.c +++ b/gcc/testsuite/gcc.dg/gomp/target-1.c @@ -5,9 +5,9 @@ foo (int x) { bad1: #pragma omp target - goto bad1; /* { dg-error "invalid branch" } */ + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } - goto bad2; /* { dg-error "invalid entry" } */ + goto bad2; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp target { bad2: ; @@ -21,7 +21,7 @@ foo (int x) { ok1: break; } } - switch (x) /* { dg-error "invalid entry" } */ + switch (x) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp target { case 0:; } diff --git a/gcc/testsuite/gcc.dg/gomp/target-2.c b/gcc/testsuite/gcc.dg/gomp/target-2.c index 546a1d0c157..3a7afc4892b 100644 --- a/gcc/testsuite/gcc.dg/gomp/target-2.c +++ b/gcc/testsuite/gcc.dg/gomp/target-2.c @@ -5,9 +5,9 @@ foo (int x, int y) { bad1: #pragma omp target data map(tofrom: y) - goto bad1; /* { dg-error "invalid branch" } */ + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } - goto bad2; /* { dg-error "invalid entry" } */ + goto bad2; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp target data map(tofrom: y) { bad2: ; @@ -21,7 +21,7 @@ foo (int x, int y) { ok1: break; } } - switch (x) /* { dg-error "invalid entry" } */ + switch (x) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp target data map(tofrom: y) { case 0:; } diff --git a/gcc/testsuite/gcc.dg/gomp/taskgroup-1.c b/gcc/testsuite/gcc.dg/gomp/taskgroup-1.c index e301efc19c4..1997e0c158d 100644 --- a/gcc/testsuite/gcc.dg/gomp/taskgroup-1.c +++ b/gcc/testsuite/gcc.dg/gomp/taskgroup-1.c @@ -5,9 +5,9 @@ foo (int x) { bad1: #pragma omp taskgroup - goto bad1; /* { dg-error "invalid branch" } */ + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } - goto bad2; /* { dg-error "invalid entry" } */ + goto bad2; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp taskgroup { bad2: ; @@ -21,7 +21,7 @@ foo (int x) { ok1: break; } } - switch (x) /* { dg-error "invalid entry" } */ + switch (x) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp taskgroup { case 0:; } diff --git a/gcc/testsuite/gcc.dg/gomp/teams-1.c b/gcc/testsuite/gcc.dg/gomp/teams-1.c index 73c00ded78b..ad5b100efc9 100644 --- a/gcc/testsuite/gcc.dg/gomp/teams-1.c +++ b/gcc/testsuite/gcc.dg/gomp/teams-1.c @@ -5,9 +5,9 @@ foo (int x) { bad1: #pragma omp target teams - goto bad1; /* { dg-error "invalid branch" } */ + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } - goto bad2; /* { dg-error "invalid entry" } */ + goto bad2; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp target teams { bad2: ; @@ -21,7 +21,7 @@ foo (int x) { ok1: break; } } - switch (x) /* { dg-error "invalid entry" } */ + switch (x) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp target teams { case 0:; } @@ -34,9 +34,9 @@ bar (int x) bad1: #pragma omp target #pragma omp teams - goto bad1; /* { dg-error "invalid branch" } */ + goto bad1; // { dg-error "invalid branch to/from OpenMP structured block" } - goto bad2; /* { dg-error "invalid entry" } */ + goto bad2; // { dg-error "invalid entry to OpenMP structured block" } #pragma omp target #pragma omp teams { @@ -52,7 +52,7 @@ bar (int x) { ok1: break; } } - switch (x) /* { dg-error "invalid entry" } */ + switch (x) // { dg-error "invalid entry to OpenMP structured block" } { #pragma omp target #pragma omp teams diff --git a/gcc/testsuite/gfortran.dg/goacc/acc_on_device-1.f95 b/gcc/testsuite/gfortran.dg/goacc/acc_on_device-1.f95 new file mode 100644 index 00000000000..9dfde26f76b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/acc_on_device-1.f95 @@ -0,0 +1,22 @@ +! Have to enable optimizations, as otherwise builtins won't be expanded. +! { dg-additional-options "-O -fdump-rtl-expand" } + +logical function f () + implicit none + + external acc_on_device + logical (4) acc_on_device + + f = .false. + f = f .or. acc_on_device () + f = f .or. acc_on_device (1, 2) + f = f .or. acc_on_device (3.14) + f = f .or. acc_on_device ("hello") + + return +end function f + +! Unsuitable to be handled as a builtin, so we're expecting four calls. +! { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]*\\\"acc_on_device" 4 "expand" } } + +! { dg-final { cleanup-rtl-dump "expand" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/acc_on_device-2-off.f95 b/gcc/testsuite/gfortran.dg/goacc/acc_on_device-2-off.f95 new file mode 100644 index 00000000000..cf2826475a3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/acc_on_device-2-off.f95 @@ -0,0 +1,39 @@ +! Have to enable optimizations, as otherwise builtins won't be expanded. +! { dg-additional-options "-O -fdump-rtl-expand -fno-openacc" } + +module openacc_kinds + implicit none + + integer, parameter :: acc_device_kind = 4 + +end module openacc_kinds + +module openacc + use openacc_kinds + implicit none + + integer (acc_device_kind), parameter :: acc_device_host = 2 + + interface + function acc_on_device (dev) + use openacc_kinds + logical (4) :: acc_on_device + integer (acc_device_kind), intent (in) :: dev + end function acc_on_device + end interface +end module openacc + +logical (4) function f () + use openacc + implicit none + + integer (4), parameter :: dev = 2 + + f = acc_on_device (dev) + return +end function f + +! Without -fopenacc, we're expecting one call. +! { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]*\\\"acc_on_device" 1 "expand" } } + +! { dg-final { cleanup-rtl-dump "expand" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/acc_on_device-2.f95 b/gcc/testsuite/gfortran.dg/goacc/acc_on_device-2.f95 new file mode 100644 index 00000000000..7730a60a7dc --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/acc_on_device-2.f95 @@ -0,0 +1,40 @@ +! Have to enable optimizations, as otherwise builtins won't be expanded. +! { dg-additional-options "-O -fdump-rtl-expand" } + +module openacc_kinds + implicit none + + integer, parameter :: acc_device_kind = 4 + +end module openacc_kinds + +module openacc + use openacc_kinds + implicit none + + integer (acc_device_kind), parameter :: acc_device_host = 2 + + interface + function acc_on_device (dev) + use openacc_kinds + logical (4) :: acc_on_device + integer (acc_device_kind), intent (in) :: dev + end function acc_on_device + end interface +end module openacc + +logical (4) function f () + use openacc + implicit none + + integer (4), parameter :: dev = 2 + + f = acc_on_device (dev) + return +end function f + +! With -fopenacc, we're expecting the builtin to be expanded, so no calls. +! TODO: not working. +! { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]*\\\"acc_on_device" 0 "expand" { xfail *-*-* } } } + +! { dg-final { cleanup-rtl-dump "expand" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/assumed.f95 b/gcc/testsuite/gfortran.dg/goacc/assumed.f95 new file mode 100644 index 00000000000..328724107eb --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/assumed.f95 @@ -0,0 +1,47 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +module test +contains + subroutine assumed_size(a) + implicit none + integer :: a(*), i + !$acc declare device_resident (a) ! { dg-error "Assumed size" } + !$acc data copy (a) ! { dg-error "Assumed size" } + !$acc end data + !$acc data deviceptr (a) ! { dg-error "Assumed size" } + !$acc end data + !$acc parallel private (a) ! { dg-error "Assumed size" } + !$acc end parallel + !$acc host_data use_device (a) ! { dg-error "Assumed size" } + !$acc end host_data + !$acc parallel loop reduction(+:a) ! { dg-error "Assumed size" } + do i = 1,5 + enddo + !$acc end parallel loop + !$acc update device (a) ! { dg-error "Assumed size" } + !$acc update host (a) ! { dg-error "Assumed size" } + !$acc update self (a) ! { dg-error "Assumed size" } + end subroutine assumed_size + subroutine assumed_rank(a) + implicit none + integer, intent(in) :: a(..) + integer :: i + !$acc declare device_resident (a) ! { dg-error "Assumed rank" } + !$acc data copy (a) ! { dg-error "Assumed rank" } + !$acc end data + !$acc data deviceptr (a) ! { dg-error "Assumed rank" } + !$acc end data + !$acc parallel private (a) ! { dg-error "Assumed rank" } + !$acc end parallel + !$acc host_data use_device (a) ! { dg-error "Assumed rank" } + !$acc end host_data + !$acc parallel loop reduction(+:a) ! { dg-error "Assumed rank" } + do i = 1,5 + enddo + !$acc end parallel loop + !$acc update device (a) ! { dg-error "Assumed rank" } + !$acc update host (a) ! { dg-error "Assumed rank" } + !$acc update self (a) ! { dg-error "Assumed rank" } + end subroutine assumed_rank +end module test diff --git a/gcc/testsuite/gfortran.dg/goacc/asyncwait-1.f95 b/gcc/testsuite/gfortran.dg/goacc/asyncwait-1.f95 new file mode 100644 index 00000000000..d630d388ef7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/asyncwait-1.f95 @@ -0,0 +1,91 @@ +! { dg-do compile } + +program asyncwait + integer, parameter :: N = 64 + real, allocatable :: a(:), b(:) + integer i + + allocate (a(N)) + allocate (b(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (1 2) ! { dg-error "Unclassifiable OpenACC directive" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (1,) ! { dg-error "Unclassifiable OpenACC directive" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (,1) ! { dg-error "Invalid character in name" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (1,2,) ! { dg-error "Unclassifiable OpenACC directive" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (1,2 3) ! { dg-error "Unclassifiable OpenACC directive" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (1,2,,) ! { dg-error "Unclassifiable OpenACC directive" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (1 ! { dg-error "Unclassifiable OpenACC directive" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (*) ! { dg-error "Invalid character in name at" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (a) ! { dg-error "ASYNC clause at \\\(1\\\) requires a scalar INTEGER expression" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (N) + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async (1.0) ! { dg-error "ASYNC clause at \\\(1\\\) requires a scalar INTEGER expression" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async () ! { dg-error "Invalid character in name at " } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) async + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel +end program asyncwait diff --git a/gcc/testsuite/gfortran.dg/goacc/asyncwait-2.f95 b/gcc/testsuite/gfortran.dg/goacc/asyncwait-2.f95 new file mode 100644 index 00000000000..db0ce1f912a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/asyncwait-2.f95 @@ -0,0 +1,91 @@ +! { dg-do compile } + +program asyncwait + integer, parameter :: N = 64 + real, allocatable :: a(:), b(:) + integer i + + allocate (a(N)) + allocate (b(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (1 2) ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (1,) ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (,1) ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (1,2,) ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (1,2 3) ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (1,2,,) ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (1 ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (*) ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (a) ! { dg-error "WAIT clause at \\\(1\\\) requires a scalar INTEGER expression" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (N) + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait (1.0) ! { dg-error "WAIT clause at \\\(1\\\) requires a scalar INTEGER expression" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait () ! { dg-error "Syntax error in OpenACC expression list" } + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel ! { dg-error "Unexpected \\\!\\\$ACC END PARALLEL" } + + !$acc parallel copyin (a(1:N)) copy (b(1:N)) wait + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel +end program asyncwait diff --git a/gcc/testsuite/gfortran.dg/goacc/asyncwait-3.f95 b/gcc/testsuite/gfortran.dg/goacc/asyncwait-3.f95 new file mode 100644 index 00000000000..32c11def6f7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/asyncwait-3.f95 @@ -0,0 +1,41 @@ +! { dg-do compile } + +program asyncwait + integer, parameter :: N = 64 + real, allocatable :: a(:), b(:) + integer i + + allocate (a(N)) + allocate (b(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc wait (1 2) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait (1,) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait (,1) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait (1, 2, ) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait (1, 2, ,) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait (1 ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait (1, *) ! { dg-error "Invalid argument to \\\$\\\!ACC WAIT" } + + !$acc wait (1, a) ! { dg-error "WAIT clause at \\\(1\\\) requires a scalar INTEGER expression" } + + !$acc wait (a) ! { dg-error "WAIT clause at \\\(1\\\) requires a scalar INTEGER expression" } + + !$acc wait (N) + + !$acc wait (1.0) ! { dg-error "WAIT clause at \\\(1\\\) requires a scalar INTEGER expression" } + + !$acc wait 1 ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait N ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait (1) +end program asyncwait diff --git a/gcc/testsuite/gfortran.dg/goacc/asyncwait-4.f95 b/gcc/testsuite/gfortran.dg/goacc/asyncwait-4.f95 new file mode 100644 index 00000000000..cd64ef3d387 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/asyncwait-4.f95 @@ -0,0 +1,37 @@ +! { dg-do compile } + +program asyncwait + integer, parameter :: N = 64 + real, allocatable :: a(:), b(:) + integer i + + allocate (a(N)) + allocate (b(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc wait async (1 2) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (1,) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (,1) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (1, 2, ) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (1, 2, ,) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (1 ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (1, *) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (1, a) ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } + + !$acc wait async (a) ! { dg-error "ASYNC clause at \\\(1\\\) requires a scalar INTEGER expression" } + + !$acc wait async (N) + + !$acc wait async (1.0) ! { dg-error "ASYNC clause at \\\(1\\\) requires a scalar INTEGER expression" } + + !$acc wait async 1 ! { dg-error "Unexpected junk in \\\!\\\$ACC WAIT at" } +end program asyncwait diff --git a/gcc/testsuite/gfortran.dg/goacc/branch.f95 b/gcc/testsuite/gfortran.dg/goacc/branch.f95 new file mode 100644 index 00000000000..7eed3e209db --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/branch.f95 @@ -0,0 +1,53 @@ +! { dg-do compile } + +program test + implicit none + + integer :: i + + if (.true.) then + !$acc parallel + end if ! { dg-error "Unexpected" } + !$acc end parallel + end if + + if (.true.) then + !$acc kernels + end if ! { dg-error "Unexpected" } + !$acc end kernels + end if + + !$acc parallel + if (.true.) then + !$acc end parallel ! { dg-error "Unexpected" } + end if + !$acc end parallel + + !$acc kernels + if (.true.) then + !$acc end kernels ! { dg-error "Unexpected" } + end if + !$acc end kernels + + !$acc parallel + if (.true.) then + end if + !$acc end parallel + + !$acc kernels + if (.true.) then + end if + !$acc end kernels + + if (.true.) then + !$acc parallel + !$acc end parallel + end if + + if (.true.) then + !$acc kernels + !$acc end kernels + end if + + +end program test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/cache-1.f95 b/gcc/testsuite/gfortran.dg/goacc/cache-1.f95 new file mode 100644 index 00000000000..746cf02f688 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/cache-1.f95 @@ -0,0 +1,12 @@ +! { dg-do compile } +! { dg-additional-options "-std=f2008" } + +program test + implicit none + integer :: i, d(10) + + do concurrent (i=1:5) + !$acc cache (d) + enddo +end +! { dg-prune-output "unimplemented" } diff --git a/gcc/testsuite/gfortran.dg/goacc/coarray.f95 b/gcc/testsuite/gfortran.dg/goacc/coarray.f95 new file mode 100644 index 00000000000..4f1224edc2a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/coarray.f95 @@ -0,0 +1,35 @@ +! { dg-do compile } +! { dg-additional-options "-fcoarray=single" } + +! TODO: These cases must fail + +module test +contains + subroutine oacc1(a) + implicit none + integer :: i + integer, codimension[*] :: a + !$acc declare device_resident (a) + !$acc data copy (a) + !$acc end data + !$acc data deviceptr (a) + !$acc end data + !$acc parallel private (a) + !$acc end parallel + !$acc host_data use_device (a) + !$acc end host_data + !$acc parallel loop reduction(+:a) + do i = 1,5 + enddo + !$acc end parallel loop + !$acc parallel loop + do i = 1,5 + !$acc cache (a) + enddo + !$acc end parallel loop + !$acc update device (a) + !$acc update host (a) + !$acc update self (a) + end subroutine oacc1 +end module test +! { dg-prune-output "ACC cache unimplemented" } diff --git a/gcc/testsuite/gfortran.dg/goacc/continuation-free-form.f95 b/gcc/testsuite/gfortran.dg/goacc/continuation-free-form.f95 new file mode 100644 index 00000000000..1c9a3f33526 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/continuation-free-form.f95 @@ -0,0 +1,23 @@ +! { dg-do compile } + +program test + implicit none + + integer :: i + real :: x + + !$acc parallel & + !$acc loop & ! continuation + !$acc & reduction(+:x) + + ! this line must be ignored + !$acc ! kernels + do i = 1,10 + x = x + 0.3 + enddo + ! continuation must begin with sentinel + !$acc end parallel & ! { dg-error "Unclassifiable OpenACC directive" } + ! loop + + print *, x +end \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/cray.f95 b/gcc/testsuite/gfortran.dg/goacc/cray.f95 new file mode 100644 index 00000000000..8f2c077beee --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/cray.f95 @@ -0,0 +1,56 @@ +! { dg-do compile } +! { dg-additional-options "-fcray-pointer" } + +module test +contains + subroutine oacc1 + implicit none + integer :: i + real :: pointee + pointer (ptr, pointee) + !$acc declare device_resident (pointee) + !$acc declare device_resident (ptr) + !$acc data copy (pointee) ! { dg-error "Cray pointee" } + !$acc end data + !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" } + !$acc end data + !$acc parallel private (pointee) ! { dg-error "Cray pointee" } + !$acc end parallel + !$acc host_data use_device (pointee) ! { dg-error "Cray pointee" } + !$acc end host_data + !$acc parallel loop reduction(+:pointee) ! { dg-error "Cray pointee" } + do i = 1,5 + enddo + !$acc end parallel loop + !$acc parallel loop + do i = 1,5 + ! Subarrays are not implemented yet + !$acc cache (pointee) ! TODO: This must fail, as in openacc-1_0-branch + enddo + !$acc end parallel loop + !$acc update device (pointee) ! { dg-error "Cray pointee" } + !$acc update host (pointee) ! { dg-error "Cray pointee" } + !$acc update self (pointee) ! { dg-error "Cray pointee" } + !$acc data copy (ptr) + !$acc end data + !$acc data deviceptr (ptr) ! { dg-error "Cray pointer" } + !$acc end data + !$acc parallel private (ptr) + !$acc end parallel + !$acc host_data use_device (ptr) ! { dg-error "Cray pointer" } + !$acc end host_data + !$acc parallel loop reduction(+:ptr) ! { dg-error "Cray pointer" } + do i = 1,5 + enddo + !$acc end parallel loop + !$acc parallel loop + do i = 1,5 + !$acc cache (ptr) ! TODO: This must fail, as in openacc-1_0-branch + enddo + !$acc end parallel loop + !$acc update device (ptr) + !$acc update host (ptr) + !$acc update self (ptr) + end subroutine oacc1 +end module test +! { dg-prune-output "unimplemented" } diff --git a/gcc/testsuite/gfortran.dg/goacc/critical.f95 b/gcc/testsuite/gfortran.dg/goacc/critical.f95 new file mode 100644 index 00000000000..510ea185052 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/critical.f95 @@ -0,0 +1,27 @@ +! { dg-do compile } +! { dg-additional-options "-fcoarray=single" } + +module test +contains + subroutine oacc1 + implicit none + integer :: i, j + j = 0 + !$acc parallel + critical ! { dg-error "CRITICAL block inside of" } + j = j + 1 + end critical + !$acc end parallel + end subroutine oacc1 + + subroutine oacc2 + implicit none + integer :: i, j + j = 0 + critical + !$acc parallel ! { dg-error "OpenACC directive inside of" } + j = j + 1 + !$acc end parallel + end critical + end subroutine oacc2 +end module test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 b/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 new file mode 100644 index 00000000000..b94214e8b63 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 @@ -0,0 +1,259 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +module test + implicit none +contains + + subroutine foo (vi, asa) + integer, value :: vi + integer :: i, ia(10) + complex :: c, ca(10) + real, target:: r + real :: ra(10) + real, pointer :: rp + real, dimension(:), allocatable :: aa + real, dimension(:) :: asa + type t + integer :: i + end type + type(t) :: ti + type(t), allocatable :: tia + type(t), target :: tit + type(t), pointer :: tip + rp => r + tip => tit + + !$acc parallel deviceptr (rp) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel deviceptr (vi) ! { dg-error "VALUE" } + !$acc end parallel + !$acc parallel deviceptr (aa) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + + !$acc parallel deviceptr (i, c, r, ia, ca, ra, asa, ti) + !$acc end parallel + !$acc kernels deviceptr (i, c, r, ia, ca, ra, asa, ti) + !$acc end kernels + !$acc data deviceptr (i, c, r, ia, ca, ra, asa, ti) + !$acc end data + + + !$acc parallel copy (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel copy (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) copy (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel copy (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels copy (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data copy (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel copyin (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel copyin (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel copyin (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels copyin (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data copyin (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel copyout (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel copyout (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyin (i) copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel copyout (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels copyout (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data copyout (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel create (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel create (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyin (i) create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyout (i) create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel create (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels create (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data create (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel present (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel present (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) present (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) present (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyin (i) present (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyout (i) present (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel create (i) present (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel present (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels present (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data present (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel pcopy (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc parallel pcopyin (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc parallel pcopyout (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc parallel pcreate (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + + + !$acc parallel present_or_copy (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel present_or_copy (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) present_or_copy (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) present_or_copy (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyin (i) present_or_copy (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyout (i) present_or_copy (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel create (i) present_or_copy (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present (i) present_or_copy (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel present_or_copy (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels present_or_copy (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data present_or_copy (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel present_or_copyin (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel present_or_copyin (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyin (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyout (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel create (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present_or_copy (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel present_or_copyin (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels present_or_copyin (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data present_or_copyin (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel present_or_copyout (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel present_or_copyout (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyin (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyout (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel create (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present_or_copy (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present_or_copyin (i) present_or_copyout (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel present_or_copyout (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels present_or_copyout (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data present_or_copyout (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + + !$acc parallel present_or_create (tip) ! { dg-error "POINTER" } + !$acc end parallel + !$acc parallel present_or_create (tia) ! { dg-error "ALLOCATABLE" } + !$acc end parallel + !$acc parallel deviceptr (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copy (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyin (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel copyout (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel create (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present_or_copy (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present_or_copyin (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + !$acc parallel present_or_copyout (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc end parallel + + !$acc parallel present_or_create (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end parallel + !$acc kernels present_or_create (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end kernels + !$acc data present_or_create (i, c, r, ia, ca, ra, asa, rp, ti, vi, aa) + !$acc end data + + end subroutine foo +end module test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/data-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/data-tree.f95 new file mode 100644 index 00000000000..32c50fd6b0b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/data-tree.f95 @@ -0,0 +1,30 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +program test + implicit none + integer :: q, i, j, k, m, n, o, p, r, s, t, u, v, w + logical :: l + + !$acc data if(l) copy(i), copyin(j), copyout(k), create(m) & + !$acc present(o), pcopy(p), pcopyin(r), pcopyout(s), pcreate(t) & + !$acc deviceptr(u) + !$acc end data + +end program test +! { dg-final { scan-tree-dump-times "pragma acc data" 1 "original" } } + +! { dg-final { scan-tree-dump-times "if" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_tofrom:i\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_to:j\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_from:k\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_alloc:m\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "map\\(force_present:o\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(tofrom:p\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(to:r\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(from:s\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(alloc:t\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } } +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-1.f95 b/gcc/testsuite/gfortran.dg/goacc/declare-1.f95 new file mode 100644 index 00000000000..03540f13bcf --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/declare-1.f95 @@ -0,0 +1,20 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +program test + implicit none + integer :: i + + !$acc declare copy(i) +contains + real function foo(n) + integer, value :: n + BLOCK + integer i + !$acc declare copy(i) + END BLOCK + end function foo +end program test +! { dg-prune-output "unimplemented" } +! { dg-final { scan-tree-dump-times "pragma acc declare map\\(force_tofrom:i\\)" 2 "original" } } +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 new file mode 100644 index 00000000000..8f1715e659d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 @@ -0,0 +1,88 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +module test + implicit none +contains + + subroutine foo (vi) + logical :: l + integer, value :: vi + integer :: i, ia(10), a(10), b(2:8) + complex :: c, ca(10) + real, target:: r + real :: ra(10) + real, pointer :: rp + real, dimension(:), allocatable :: aa + type t + integer :: i + end type + type(t) :: ti + type(t), allocatable :: tia + type(t), target :: tit + type(t), pointer :: tip + rp => r + tip => tit + + ! enter data + !$acc enter data + !$acc enter data if (.false.) + !$acc enter data if (l) + !$acc enter data if (.false.) if (l) ! { dg-error "Unclassifiable" } + !$acc enter data if (i) ! { dg-error "LOGICAL" } + !$acc enter data if (1) ! { dg-error "LOGICAL" } + !$acc enter data if (a) ! { dg-error "LOGICAL" } + !$acc enter data if (b(5:6)) ! { dg-error "LOGICAL" } + !$acc enter data async (l) ! { dg-error "INTEGER" } + !$acc enter data async (.true.) ! { dg-error "INTEGER" } + !$acc enter data async (1) + !$acc enter data async (i) + !$acc enter data async (a) ! { dg-error "INTEGER" } + !$acc enter data async (b(5:6)) ! { dg-error "INTEGER" } + !$acc enter data wait (l) ! { dg-error "INTEGER" } + !$acc enter data wait (.true.) ! { dg-error "INTEGER" } + !$acc enter data wait (i, 1) + !$acc enter data wait (a) ! { dg-error "INTEGER" } + !$acc enter data wait (b(5:6)) ! { dg-error "INTEGER" } + !$acc enter data copyin (tip) ! { dg-error "POINTER" } + !$acc enter data copyin (tia) ! { dg-error "ALLOCATABLE" } + !$acc enter data create (tip) ! { dg-error "POINTER" } + !$acc enter data create (tia) ! { dg-error "ALLOCATABLE" } + !$acc enter data present_or_copyin (tip) ! { dg-error "POINTER" } + !$acc enter data present_or_copyin (tia) ! { dg-error "ALLOCATABLE" } + !$acc enter data present_or_create (tip) ! { dg-error "POINTER" } + !$acc enter data present_or_create (tia) ! { dg-error "ALLOCATABLE" } + !$acc enter data copyin (i) create (i) ! { dg-error "multiple clauses" } + !$acc enter data copyin (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc enter data create (i) present_or_copyin (i) ! { dg-error "multiple clauses" } + !$acc enter data copyin (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc enter data create (i) present_or_create (i) ! { dg-error "multiple clauses" } + !$acc enter data present_or_copyin (i) present_or_create (i) ! { dg-error "multiple clauses" } + + ! exit data + !$acc exit data + !$acc exit data if (.false.) + !$acc exit data if (l) + !$acc exit data if (.false.) if (l) ! { dg-error "Unclassifiable" } + !$acc exit data if (i) ! { dg-error "LOGICAL" } + !$acc exit data if (1) ! { dg-error "LOGICAL" } + !$acc exit data if (a) ! { dg-error "LOGICAL" } + !$acc exit data if (b(5:6)) ! { dg-error "LOGICAL" } + !$acc exit data async (l) ! { dg-error "INTEGER" } + !$acc exit data async (.true.) ! { dg-error "INTEGER" } + !$acc exit data async (1) + !$acc exit data async (i) + !$acc exit data async (a) ! { dg-error "INTEGER" } + !$acc exit data async (b(5:6)) ! { dg-error "INTEGER" } + !$acc exit data wait (l) ! { dg-error "INTEGER" } + !$acc exit data wait (.true.) ! { dg-error "INTEGER" } + !$acc exit data wait (i, 1) + !$acc exit data wait (a) ! { dg-error "INTEGER" } + !$acc exit data wait (b(5:6)) ! { dg-error "INTEGER" } + !$acc exit data copyout (tip) ! { dg-error "POINTER" } + !$acc exit data copyout (tia) ! { dg-error "ALLOCATABLE" } + !$acc exit data delete (tip) ! { dg-error "POINTER" } + !$acc exit data delete (tia) ! { dg-error "ALLOCATABLE" } + !$acc exit data copyout (i) delete (i) ! { dg-error "multiple clauses" } + end subroutine foo +end module test diff --git a/gcc/testsuite/gfortran.dg/goacc/fixed-1.f b/gcc/testsuite/gfortran.dg/goacc/fixed-1.f new file mode 100644 index 00000000000..6a454190102 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/fixed-1.f @@ -0,0 +1,12 @@ + INTEGER :: ARGC + ARGC = COMMAND_ARGUMENT_COUNT () + +!$OMP PARALLEL +!$ACC PARALLEL COPYIN(ARGC) + IF (ARGC .NE. 0) THEN + CALL ABORT + END IF +!$ACC END PARALLEL +!$OMP END PARALLEL + + END diff --git a/gcc/testsuite/gfortran.dg/goacc/fixed-2.f b/gcc/testsuite/gfortran.dg/goacc/fixed-2.f new file mode 100644 index 00000000000..2c2b0a38e00 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/fixed-2.f @@ -0,0 +1,15 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + + INTEGER :: ARGC + ARGC = COMMAND_ARGUMENT_COUNT () + +!$OMP xPARALLEL +!$ACC xPARALLEL COPYIN(ARGC) ! { dg-error "Unclassifiable OpenACC directive" } + IF (ARGC .NE. 0) THEN + CALL ABORT + END IF +!$ACC END PARALLEL ! { dg-error "Unexpected" } +!$OMP END PARALLEL + + END diff --git a/gcc/testsuite/gfortran.dg/goacc/fixed-3.f b/gcc/testsuite/gfortran.dg/goacc/fixed-3.f new file mode 100644 index 00000000000..ede361e8d2e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/fixed-3.f @@ -0,0 +1,13 @@ + IMPLICIT NONE + + INTEGER DEV + +!$ACC PARALLEL + DEV = 0 +!$ACC END PARALLEL + +!$ACC PARALLEL + DEV = 0 +!$ACC END PARALLEL + + END diff --git a/gcc/testsuite/gfortran.dg/goacc/fixed-4.f b/gcc/testsuite/gfortran.dg/goacc/fixed-4.f new file mode 100644 index 00000000000..120d5a9aa1c --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/fixed-4.f @@ -0,0 +1,6 @@ + IMPLICIT NONE + +!$ACC PARALLEL +!$ACC END PARALLEL + + END diff --git a/gcc/testsuite/gfortran.dg/goacc/goacc.exp b/gcc/testsuite/gfortran.dg/goacc/goacc.exp new file mode 100644 index 00000000000..dcacb31906d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/goacc.exp @@ -0,0 +1,36 @@ +# Copyright (C) 2005-2015 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gfortran-dg.exp + +if ![check_effective_target_fopenacc] { + return +} + +# Initialize `dg'. +dg-init + +# Main loop. +gfortran-dg-runtest [lsort \ + [find $srcdir/$subdir *.\[fF\]{,90,95,03,08} ] ] "" "-fopenacc" + +# All done. +dg-finish diff --git a/gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 new file mode 100644 index 00000000000..19e741185ef --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 @@ -0,0 +1,13 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +program test + implicit none + integer :: i + + !$acc host_data use_device(i) + !$acc end host_data +end program test +! { dg-prune-output "unimplemented" } +! { dg-final { scan-tree-dump-times "pragma acc host_data use_device\\(i\\)" 1 "original" } } +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/if.f95 b/gcc/testsuite/gfortran.dg/goacc/if.f95 new file mode 100644 index 00000000000..a45035d8230 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/if.f95 @@ -0,0 +1,52 @@ +! { dg-do compile } + +program test + implicit none + + logical :: x + integer :: i + + !$acc parallel if ! { dg-error "Unclassifiable OpenACC directive" } + !$acc parallel if () ! { dg-error "Invalid character" } + !$acc parallel if (i) ! { dg-error "scalar LOGICAL expression" } + !$acc end parallel + !$acc parallel if (1) ! { dg-error "scalar LOGICAL expression" } + !$acc end parallel + !$acc kernels if (i) ! { dg-error "scalar LOGICAL expression" } + !$acc end kernels + !$acc kernels if ! { dg-error "Unclassifiable OpenACC directive" } + !$acc kernels if () ! { dg-error "Invalid character" } + !$acc kernels if (1) ! { dg-error "scalar LOGICAL expression" } + !$acc end kernels + !$acc data if ! { dg-error "Unclassifiable OpenACC directive" } + !$acc data if () ! { dg-error "Invalid character" } + !$acc data if (i) ! { dg-error "scalar LOGICAL expression" } + !$acc end data + !$acc data if (1) ! { dg-error "scalar LOGICAL expression" } + !$acc end data + + ! at most one if clause may appear + !$acc parallel if (.false.) if (.false.) { dg-error "Unclassifiable OpenACC directive" } + !$acc kernels if (.false.) if (.false.) { dg-error "Unclassifiable OpenACC directive" } + !$acc data if (.false.) if (.false.) { dg-error "Unclassifiable OpenACC directive" } + + !$acc parallel if (x) + !$acc end parallel + !$acc parallel if (.true.) + !$acc end parallel + !$acc parallel if (i.gt.1) + !$acc end parallel + !$acc kernels if (x) + !$acc end kernels + !$acc kernels if (.true.) + !$acc end kernels + !$acc kernels if (i.gt.1) + !$acc end kernels + !$acc data if (x) + !$acc end data + !$acc data if (.true.) + !$acc end data + !$acc data if (i.gt.1) + !$acc end data + +end program test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 new file mode 100644 index 00000000000..7585a16de8f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 @@ -0,0 +1,32 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +program test + implicit none + integer :: q, i, j, k, m, n, o, p, r, s, t, u, v, w + logical :: l + + !$acc kernels if(l) async copy(i), copyin(j), copyout(k), create(m) & + !$acc present(o), pcopy(p), pcopyin(r), pcopyout(s), pcreate(t) & + !$acc deviceptr(u) + !$acc end kernels + +end program test +! { dg-final { scan-tree-dump-times "pragma acc kernels" 1 "original" } } + +! { dg-final { scan-tree-dump-times "if" 1 "original" } } +! { dg-final { scan-tree-dump-times "async" 1 "original" } } + +! { dg-final { scan-tree-dump-times "map\\(force_tofrom:i\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_to:j\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_from:k\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_alloc:m\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "map\\(force_present:o\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(tofrom:p\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(to:r\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(from:s\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(alloc:t\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } } +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/list.f95 b/gcc/testsuite/gfortran.dg/goacc/list.f95 new file mode 100644 index 00000000000..94fdadd86db --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/list.f95 @@ -0,0 +1,111 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +program test + implicit none + + integer :: i, j, k, l, a(10) + common /b/ j, k + real, pointer :: p1 => NULL() + complex :: c, d(10) + + !$acc parallel private(i) + !$acc end parallel + + !$acc parallel private(a) + !$acc end parallel + + !$acc parallel private(c, d) + !$acc end parallel + + !$acc parallel private(i, j, k, l, a) + !$acc end parallel + + !$acc parallel private (i) private (j) + !$acc end parallel + + !$acc parallel private ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc parallel private() ! { dg-error "Syntax error" } + + !$acc parallel private(a(1:3)) ! { dg-error "Syntax error" } + + !$acc parallel private(10) ! { dg-error "Syntax error" } + + !$acc parallel private(/b/, /b/) ! { dg-error "present on multiple clauses" } + !$acc end parallel + + !$acc parallel private(i, j, i) ! { dg-error "present on multiple clauses" } + !$acc end parallel + + !$acc parallel private(p1) + !$acc end parallel + + !$acc parallel firstprivate(i) + !$acc end parallel + + !$acc parallel firstprivate(c, d) + !$acc end parallel + + !$acc parallel firstprivate(a) + !$acc end parallel + + !$acc parallel firstprivate(i, j, k, l, a) + !$acc end parallel + + !$acc parallel firstprivate (i) firstprivate (j) + !$acc end parallel + + !$acc parallel firstprivate ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc parallel firstprivate() ! { dg-error "Syntax error" } + + !$acc parallel firstprivate(a(1:3)) ! { dg-error "Syntax error" } + + !$acc parallel firstprivate(10) ! { dg-error "Syntax error" } + + !$acc parallel firstprivate (/b/, /b/) ! { dg-error "present on multiple clauses" } + !$acc end parallel + + !$acc parallel firstprivate (i, j, i) ! { dg-error "present on multiple clauses" } + !$acc end parallel + + !$acc parallel firstprivate(p1) + !$acc end parallel + + !$acc parallel private (i) firstprivate (i) ! { dg-error "present on multiple clauses" } + !$acc end parallel + + !$acc host_data use_device(i) + !$acc end host_data + + !$acc host_data use_device(c, d) + !$acc end host_data + + !$acc host_data use_device(a) + !$acc end host_data + + !$acc host_data use_device(i, j, k, l, a) + !$acc end host_data + + !$acc host_data use_device (i) use_device (j) + !$acc end host_data + + !$acc host_data use_device ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc host_data use_device() ! { dg-error "Syntax error" } + + !$acc host_data use_device(a(1:3)) ! { dg-error "Syntax error" } + + !$acc host_data use_device(10) ! { dg-error "Syntax error" } + + !$acc host_data use_device(/b/, /b/) ! { dg-error "present on multiple clauses" } + !$acc end host_data + + !$acc host_data use_device(i, j, i) ! { dg-error "present on multiple clauses" } + !$acc end host_data + + !$acc host_data use_device(p1) ! { dg-error "POINTER" } + !$acc end host_data + +end program test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/literal.f95 b/gcc/testsuite/gfortran.dg/goacc/literal.f95 new file mode 100644 index 00000000000..e6760d04fa1 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/literal.f95 @@ -0,0 +1,30 @@ +! { dg-do compile } + +module test +contains + subroutine oacc1 + implicit none + integer :: i + !$acc declare device_resident (10) ! { dg-error "Syntax error" } + !$acc data copy (10) ! { dg-error "Syntax error" } + !$acc end data ! { dg-error "Unexpected" } + !$acc data deviceptr (10) ! { dg-error "Syntax error" } + !$acc end data ! { dg-error "Unexpected" } + !$acc data private (10) ! { dg-error "Unclassifiable" } + !$acc end data ! { dg-error "Unexpected" } + !$acc host_data use_device (10) ! { dg-error "Syntax error" } + !$acc end host_data ! { dg-error "Unexpected" } + !$acc parallel loop reduction(+:10) ! { dg-error "Syntax error" } + do i = 1,5 + enddo + !$acc end parallel loop ! { dg-error "Unexpected" } + !$acc parallel loop + do i = 1,5 + !$acc cache (10) ! { dg-error "Syntax error" } + enddo + !$acc end parallel loop + !$acc update device (10) ! { dg-error "Syntax error" } + !$acc update host (10) ! { dg-error "Syntax error" } + !$acc update self (10) ! { dg-error "Syntax error" } + end subroutine oacc1 +end module test diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 new file mode 100644 index 00000000000..e1b2dfd5d6e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-1.f95 @@ -0,0 +1,171 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } +module test + implicit none +contains + +subroutine test1 + integer :: i, j, k, b(10) + integer, dimension (30) :: a + double precision :: d + real :: r + i = 0 + !$acc loop + do 100 ! { dg-error "cannot be a DO WHILE or DO without loop control" } + if (i .gt. 0) exit ! { dg-error "EXIT statement" } + 100 i = i + 1 + i = 0 + !$acc loop + do ! { dg-error "cannot be a DO WHILE or DO without loop control" } + if (i .gt. 0) exit ! { dg-error "EXIT statement" } + i = i + 1 + end do + i = 0 + !$acc loop + do 200 while (i .lt. 4) ! { dg-error "cannot be a DO WHILE or DO without loop control" } + 200 i = i + 1 + !$acc loop + do while (i .lt. 8) ! { dg-error "cannot be a DO WHILE or DO without loop control" } + i = i + 1 + end do + !$acc loop + do 300 d = 1, 30, 6 ! { dg-error "integer" } + i = d + 300 a(i) = 1 + !$acc loop + do d = 1, 30, 5 ! { dg-error "integer" } + i = d + a(i) = 2 + end do + !$acc loop + do i = 1, 30 + if (i .eq. 16) exit ! { dg-error "EXIT statement" } + end do + !$acc loop + outer: do i = 1, 30 + do j = 5, 10 + if (i .eq. 6 .and. j .eq. 7) exit outer ! { dg-error "EXIT statement" } + end do + end do outer + last: do i = 1, 30 + end do last + + ! different types of loop are allowed + !$acc loop + do i = 1,10 + end do + !$acc loop + do 400, i = 1,10 +400 a(i) = i + + ! after loop directive must be loop + !$acc loop + a(1) = 1 ! { dg-error "Expected DO loop" } + do i = 1,10 + enddo + + ! combined directives may be used with/without end + !$acc parallel loop + do i = 1,10 + enddo + !$acc parallel loop + do i = 1,10 + enddo + !$acc end parallel loop + !$acc kernels loop + do i = 1,10 + enddo + !$acc kernels loop + do i = 1,10 + enddo + !$acc end kernels loop + + !$acc kernels loop reduction(max:i) + do i = 1,10 + enddo + !$acc kernels + !$acc loop reduction(max:i) + do i = 1,10 + enddo + !$acc end kernels + + !$acc parallel loop collapse(0) ! { dg-error "constant positive integer" } + do i = 1,10 + enddo + + !$acc parallel loop collapse(-1) ! { dg-error "constant positive integer" } + do i = 1,10 + enddo + + !$acc parallel loop collapse(i) ! { dg-error "Constant expression required" } + do i = 1,10 + enddo + + !$acc parallel loop collapse(4) ! { dg-error "not enough DO loops for collapsed" } + do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + a(i+j-k) = i + j + k + end do + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 5, 2 + do j = i + 1, 7, i ! { dg-error "collapsed loops don.t form rectangular iteration space" } + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do j = 4, 6 + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do j = 4, 6 + end do + k = 4 + end do + !$acc parallel loop collapse(3-1) + do i = 1, 3 + do j = 4, 6 + end do + k = 4 + end do + !$acc parallel loop collapse(1+1) + do i = 1, 3 + do j = 4, 6 + end do + k = 4 + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do ! { dg-error "cannot be a DO WHILE or DO without loop control" } + end do + end do + !$acc parallel loop collapse(2) + do i = 1, 3 + do r = 4, 6 ! { dg-error "integer" } + end do + end do + + ! Both seq and independent are not allowed + !$acc loop independent seq ! { dg-error "SEQ conflicts with INDEPENDENT" } + do i = 1,10 + enddo + + + !$acc cache (a) ! { dg-error "inside of loop" } + + do i = 1,10 + !$acc cache(a) + enddo + + do i = 1,10 + a(i) = i + !$acc cache(a) + enddo + +end subroutine test1 +end module test +! { dg-prune-output "Deleted" } +! { dg-prune-output "ACC cache unimplemented" } diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-2.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-2.f95 new file mode 100644 index 00000000000..f85691eb8e3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-2.f95 @@ -0,0 +1,649 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +! TODO: nested kernels are allowed in 2.0 + +program test + implicit none + integer :: i, j + + !$acc kernels + !$acc loop auto + DO i = 1,10 + ENDDO + !$acc loop gang + DO i = 1,10 + ENDDO + !$acc loop gang(5) + DO i = 1,10 + ENDDO + !$acc loop gang(num:5) + DO i = 1,10 + ENDDO + !$acc loop gang(static:5) + DO i = 1,10 + ENDDO + !$acc loop gang(static:*) + DO i = 1,10 + ENDDO + !$acc loop gang + DO i = 1,10 + !$acc loop vector + DO j = 1,10 + ENDDO + !$acc loop worker + DO j = 1,10 + ENDDO + !$acc loop gang ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + ENDDO + !$acc loop seq gang ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop worker + DO i = 1,10 + ENDDO + !$acc loop worker(5) + DO i = 1,10 + ENDDO + !$acc loop worker(num:5) + DO i = 1,10 + ENDDO + !$acc loop worker + DO i = 1,10 + !$acc loop vector + DO j = 1,10 + ENDDO + !$acc loop worker ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + !$acc loop gang ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + ENDDO + !$acc loop seq worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop gang worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop vector + DO i = 1,10 + ENDDO + !$acc loop vector(5) + DO i = 1,10 + ENDDO + !$acc loop vector(length:5) + DO i = 1,10 + ENDDO + !$acc loop vector + DO i = 1,10 + !$acc loop vector ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + !$acc loop worker ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + !$acc loop gang ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + ENDDO + !$acc loop seq vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop gang vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop worker vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop auto + DO i = 1,10 + ENDDO + !$acc loop seq auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop gang auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop worker auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop vector auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop tile ! { dg-error "Unclassifiable" } + DO i = 1,10 + ENDDO + !$acc loop tile() ! { dg-error "Syntax error" } + DO i = 1,10 + ENDDO + !$acc loop tile(1) + DO i = 1,10 + ENDDO + !$acc loop tile(2) + DO i = 1,10 + ENDDO + !$acc loop tile(6-2) + DO i = 1,10 + ENDDO + !$acc loop tile(6+2) + DO i = 1,10 + ENDDO + !$acc loop tile(*) + DO i = 1,10 + ENDDO + !$acc loop tile(*, 1) + DO i = 1,10 + DO j = 1,10 + ENDDO + ENDDO + !$acc loop tile(-1) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop tile(i) ! { dg-error "constant expression" } + do i = 1,10 + enddo + !$acc loop tile(2, 2, 1) ! { dg-error "not enough DO loops for tiled" } + do i = 1, 3 + do j = 4, 6 + end do + end do + !$acc loop tile(2, 2) + do i = 1, 5, 2 + do j = i + 1, 7, i ! { dg-error "tiled loops don.t form rectangular iteration space" } + end do + end do + !$acc loop vector tile(*) + DO i = 1,10 + ENDDO + !$acc loop worker tile(*) + DO i = 1,10 + ENDDO + !$acc loop gang tile(*) + DO i = 1,10 + ENDDO + !$acc loop vector gang tile(*) + DO i = 1,10 + ENDDO + !$acc loop vector worker tile(*) + DO i = 1,10 + ENDDO + !$acc loop gang worker tile(*) + DO i = 1,10 + ENDDO + !$acc end kernels + + + !$acc parallel + !$acc loop auto + DO i = 1,10 + ENDDO + !$acc loop gang + DO i = 1,10 + ENDDO + !$acc loop gang(5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc loop gang(num:5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc loop gang(static:5) + DO i = 1,10 + ENDDO + !$acc loop gang(static:*) + DO i = 1,10 + ENDDO + !$acc loop gang + DO i = 1,10 + !$acc loop vector + DO j = 1,10 + ENDDO + !$acc loop worker + DO j = 1,10 + ENDDO + !$acc loop gang ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + ENDDO + !$acc loop seq gang ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop worker + DO i = 1,10 + ENDDO + !$acc loop worker(5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc loop worker(num:5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc loop worker + DO i = 1,10 + !$acc loop vector + DO j = 1,10 + ENDDO + !$acc loop worker ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + !$acc loop gang ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + ENDDO + !$acc loop seq worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop gang worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop vector + DO i = 1,10 + ENDDO + !$acc loop vector(5) + DO i = 1,10 + ENDDO + !$acc loop vector(length:5) + DO i = 1,10 + ENDDO + !$acc loop vector + DO i = 1,10 + !$acc loop vector ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + !$acc loop worker ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + !$acc loop gang ! { dg-error "not allowed" } + DO j = 1,10 + ENDDO + ENDDO + !$acc loop seq vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop gang vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop worker vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop auto + DO i = 1,10 + ENDDO + !$acc loop seq auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop gang auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop worker auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc loop vector auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc loop tile ! { dg-error "Unclassifiable" } + DO i = 1,10 + ENDDO + !$acc loop tile() ! { dg-error "Syntax error" } + DO i = 1,10 + ENDDO + !$acc loop tile(1) + DO i = 1,10 + ENDDO + !$acc loop tile(*) + DO i = 1,10 + ENDDO + !$acc loop tile(2) + DO i = 1,10 + DO j = 1,10 + ENDDO + ENDDO + !$acc loop tile(-1) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop tile(i) ! { dg-error "constant expression" } + do i = 1,10 + enddo + !$acc loop tile(2, 2, 1) ! { dg-error "not enough DO loops for tiled" } + do i = 1, 3 + do j = 4, 6 + end do + end do + !$acc loop tile(2, 2) + do i = 1, 5, 2 + do j = i + 1, 7, i ! { dg-error "tiled loops don.t form rectangular iteration space" } + end do + end do + !$acc loop vector tile(*) + DO i = 1,10 + ENDDO + !$acc loop worker tile(*) + DO i = 1,10 + ENDDO + !$acc loop gang tile(*) + DO i = 1,10 + ENDDO + !$acc loop vector gang tile(*) + DO i = 1,10 + ENDDO + !$acc loop vector worker tile(*) + DO i = 1,10 + ENDDO + !$acc loop gang worker tile(*) + DO i = 1,10 + ENDDO + !$acc end parallel + + !$acc kernels loop auto + DO i = 1,10 + ENDDO + !$acc kernels loop gang + DO i = 1,10 + ENDDO + !$acc kernels loop gang(5) + DO i = 1,10 + ENDDO + !$acc kernels loop gang(num:5) + DO i = 1,10 + ENDDO + !$acc kernels loop gang(static:5) + DO i = 1,10 + ENDDO + !$acc kernels loop gang(static:*) + DO i = 1,10 + ENDDO + !$acc kernels loop gang + DO i = 1,10 + !$acc kernels loop gang + DO j = 1,10 + ENDDO + ENDDO + !$acc kernels loop seq gang ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc kernels loop worker + DO i = 1,10 + ENDDO + !$acc kernels loop worker(5) + DO i = 1,10 + ENDDO + !$acc kernels loop worker(num:5) + DO i = 1,10 + ENDDO + !$acc kernels loop worker + DO i = 1,10 + !$acc kernels loop worker + DO j = 1,10 + ENDDO + !$acc kernels loop gang + DO j = 1,10 + ENDDO + ENDDO + !$acc kernels loop seq worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc kernels loop gang worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc kernels loop vector + DO i = 1,10 + ENDDO + !$acc kernels loop vector(5) + DO i = 1,10 + ENDDO + !$acc kernels loop vector(length:5) + DO i = 1,10 + ENDDO + !$acc kernels loop vector + DO i = 1,10 + !$acc kernels loop vector + DO j = 1,10 + ENDDO + !$acc kernels loop worker + DO j = 1,10 + ENDDO + !$acc kernels loop gang + DO j = 1,10 + ENDDO + ENDDO + !$acc kernels loop seq vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc kernels loop gang vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc kernels loop worker vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc kernels loop auto + DO i = 1,10 + ENDDO + !$acc kernels loop seq auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc kernels loop gang auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc kernels loop worker auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc kernels loop vector auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc kernels loop tile ! { dg-error "Unclassifiable" } + DO i = 1,10 + ENDDO + !$acc kernels loop tile() ! { dg-error "Syntax error" } + DO i = 1,10 + ENDDO + !$acc kernels loop tile(1) + DO i = 1,10 + ENDDO + !$acc kernels loop tile(*) + DO i = 1,10 + ENDDO + !$acc kernels loop tile(*, 1) + DO i = 1,10 + DO j = 1,10 + ENDDO + ENDDO + !$acc kernels loop tile(-1) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc kernels loop tile(i) ! { dg-error "constant expression" } + do i = 1,10 + enddo + !$acc kernels loop tile(2, 2, 1) ! { dg-error "not enough DO loops for tiled" } + do i = 1, 3 + do j = 4, 6 + end do + end do + !$acc kernels loop tile(2, 2) + do i = 1, 5, 2 + do j = i + 1, 7, i ! { dg-error "tiled loops don.t form rectangular iteration space" } + end do + end do + !$acc kernels loop vector tile(*) + DO i = 1,10 + ENDDO + !$acc kernels loop worker tile(*) + DO i = 1,10 + ENDDO + !$acc kernels loop gang tile(*) + DO i = 1,10 + ENDDO + !$acc kernels loop vector gang tile(*) + DO i = 1,10 + ENDDO + !$acc kernels loop vector worker tile(*) + DO i = 1,10 + ENDDO + !$acc kernels loop gang worker tile(*) + DO i = 1,10 + ENDDO + + !$acc parallel loop auto + DO i = 1,10 + ENDDO + !$acc parallel loop gang + DO i = 1,10 + ENDDO + !$acc parallel loop gang(5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc parallel loop gang(num:5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc parallel loop gang(static:5) + DO i = 1,10 + ENDDO + !$acc parallel loop gang(static:*) + DO i = 1,10 + ENDDO + !$acc parallel loop gang + DO i = 1,10 + !$acc parallel loop gang + DO j = 1,10 + ENDDO + ENDDO + !$acc parallel loop seq gang ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc parallel loop worker + DO i = 1,10 + ENDDO + !$acc parallel loop worker(5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc parallel loop worker(num:5) ! { dg-error "non-static" } + DO i = 1,10 + ENDDO + !$acc parallel loop worker + DO i = 1,10 + !$acc parallel loop worker + DO j = 1,10 + ENDDO + !$acc parallel loop gang + DO j = 1,10 + ENDDO + ENDDO + !$acc parallel loop seq worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc parallel loop gang worker ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc parallel loop vector + DO i = 1,10 + ENDDO + !$acc parallel loop vector(5) + DO i = 1,10 + ENDDO + !$acc parallel loop vector(length:5) + DO i = 1,10 + ENDDO + !$acc parallel loop vector + DO i = 1,10 + !$acc parallel loop vector + DO j = 1,10 + ENDDO + !$acc parallel loop worker + DO j = 1,10 + ENDDO + !$acc parallel loop gang + DO j = 1,10 + ENDDO + ENDDO + !$acc parallel loop seq vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc parallel loop gang vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc parallel loop worker vector ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc parallel loop auto + DO i = 1,10 + ENDDO + !$acc parallel loop seq auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc parallel loop gang auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc parallel loop worker auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + !$acc parallel loop vector auto ! { dg-error "conflicts with" } + DO i = 1,10 + ENDDO + + !$acc parallel loop tile ! { dg-error "Unclassifiable" } + DO i = 1,10 + ENDDO + !$acc parallel loop tile() ! { dg-error "Syntax error" } + DO i = 1,10 + ENDDO + !$acc parallel loop tile(1) + DO i = 1,10 + ENDDO + !$acc parallel loop tile(*) + DO i = 1,10 + ENDDO + !$acc parallel loop tile(*, 1) + DO i = 1,10 + DO j = 1,10 + ENDDO + ENDDO + !$acc parallel loop tile(-1) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc parallel loop tile(i) ! { dg-error "constant expression" } + do i = 1,10 + enddo + !$acc parallel loop tile(2, 2, 1) ! { dg-error "not enough DO loops for tiled" } + do i = 1, 3 + do j = 4, 6 + end do + end do + !$acc parallel loop tile(2, 2) + do i = 1, 5, 2 + do j = i + 1, 7, i ! { dg-error "tiled loops don.t form rectangular iteration space" } + end do + end do + !$acc parallel loop vector tile(*) + DO i = 1,10 + ENDDO + !$acc parallel loop worker tile(*) + DO i = 1,10 + ENDDO + !$acc parallel loop gang tile(*) + DO i = 1,10 + ENDDO + !$acc parallel loop vector gang tile(*) + DO i = 1,10 + ENDDO + !$acc parallel loop vector worker tile(*) + DO i = 1,10 + ENDDO + !$acc parallel loop gang worker tile(*) + DO i = 1,10 + ENDDO +end \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 new file mode 100644 index 00000000000..2a866c79234 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-3.f95 @@ -0,0 +1,55 @@ +! { dg-do compile } +! { dg-additional-options "-std=f2008" } + +subroutine test1 + implicit none + integer :: i, j + + ! !$acc end loop not required by spec + !$acc loop + do i = 1,5 + enddo + !$acc end loop ! { dg-warning "Redundant" } + + !$acc loop + do i = 1,5 + enddo + j = 1 + !$acc end loop ! { dg-error "Unexpected" } + + !$acc parallel + !$acc loop + do i = 1,5 + enddo + !$acc end parallel + !$acc end loop ! { dg-error "Unexpected" } + + ! OpenACC supports Fortran 2008 do concurrent statement + !$acc loop + do concurrent (i = 1:5) + end do + + !$acc loop + outer_loop: do i = 1, 5 + inner_loop: do j = 1,5 + if (i .eq. j) cycle outer_loop + if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" } + end do inner_loop + end do outer_loop + + outer_loop1: do i = 1, 5 + !$acc loop + inner_loop1: do j = 1,5 + if (i .eq. j) cycle outer_loop1 ! { dg-error "CYCLE statement" } + end do inner_loop1 + end do outer_loop1 + + !$acc loop collapse(2) + outer_loop2: do i = 1, 5 + inner_loop2: do j = 1,5 + if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" } + if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" } + end do inner_loop2 + end do outer_loop2 +end subroutine test1 + diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 b/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 new file mode 100644 index 00000000000..966e75bcde3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 @@ -0,0 +1,48 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original -std=f2008" } + +! test for tree-dump-original and spaces-commas + +program test + implicit none + integer :: i, j, k, m, sum + REAL :: a(64), b(64), c(64) + + !$acc kernels + !$acc loop collapse(2) + DO i = 1,10 + DO j = 1,10 + ENDDO + ENDDO + + !$acc loop independent gang (3) + DO i = 1,10 + !$acc loop worker(3) ! { dg-error "work-sharing region may not be closely nested inside of work-sharing, critical, ordered, master or explicit task region" } + DO j = 1,10 + !$acc loop vector(5) + DO k = 1,10 + ENDDO + ENDDO + ENDDO + !$acc end kernels + + sum = 0 + !$acc parallel + !$acc loop private(m) reduction(+:sum) + DO i = 1,10 + sum = sum + 1 + ENDDO + !$acc end parallel + +end program test +! { dg-final { scan-tree-dump-times "pragma acc loop" 5 "original" } } + +! { dg-final { scan-tree-dump-times "collapse\\(2\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "independent" 1 "original" } } +! { dg-final { scan-tree-dump-times "gang\\(num: 3\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "worker\\(3\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "vector\\(5\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } } +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/omp.f95 b/gcc/testsuite/gfortran.dg/goacc/omp.f95 new file mode 100644 index 00000000000..24f639ff54a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/omp.f95 @@ -0,0 +1,66 @@ +! { dg-do compile } +! { dg-additional-options "-fopenmp" } + +module test +contains + subroutine ichi + implicit none + integer :: i + !$acc parallel + !$omp do ! { dg-error "cannot be specified" } + do i = 1,5 + enddo + !$acc end parallel + end subroutine ichi + + subroutine ni + implicit none + integer :: i + !$omp parallel + !$acc loop ! { dg-error "cannot be specified" } + do i = 1,5 + enddo + !$omp end parallel + end subroutine ni + + subroutine san + implicit none + integer :: i + !$omp do + !$acc loop ! { dg-error "Unexpected" } + do i = 1,5 + enddo + end subroutine san + + subroutine yon + implicit none + integer :: i + !$acc loop + !$omp do ! { dg-error "Expected DO loop" } + do i = 1,5 + enddo + end subroutine yon + + subroutine go + implicit none + integer :: i, j + + !$omp parallel + do i = 1,5 + !$acc kernels ! { dg-error "cannot be specified" } + do j = 1,5 + enddo + !$acc end kernels + enddo + !$omp end parallel + end subroutine go + + subroutine roku + implicit none + + !$acc data + !$omp parallel ! { dg-error "cannot be specified" } + !$omp end parallel + !$acc end data + end subroutine roku +end module test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/parallel-kernels-clauses.f95 b/gcc/testsuite/gfortran.dg/goacc/parallel-kernels-clauses.f95 new file mode 100644 index 00000000000..c37208c7e6f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/parallel-kernels-clauses.f95 @@ -0,0 +1,96 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +! test clauses added in OpenACC ver 2.0 + +program test + implicit none + integer :: i, a(10), b(5:7) + integer, parameter :: acc_async_noval = -1 + integer, parameter :: acc_async_sync = -2 + logical :: l + + ! async + !$acc kernels async(i) + !$acc end kernels + !$acc parallel async(i) + !$acc end parallel + + !$acc kernels async(0, 1) { dg-error "Unclassifiable" } + !$acc parallel async(0, 1) { dg-error "Unclassifiable" } + + !$acc kernels async + !$acc end kernels + !$acc parallel async + !$acc end parallel + + !$acc kernels async(acc_async_noval) + !$acc end kernels + !$acc parallel async(acc_async_noval) + !$acc end parallel + + !$acc kernels async(acc_async_sync) + !$acc end kernels + !$acc parallel async(acc_async_sync) + !$acc end parallel + + !$acc kernels async() { dg-error "Invalid character" } + !$acc parallel async() { dg-error "Invalid character" } + + !$acc kernels async("a") { dg-error "Unclassifiable" } + !$acc parallel async("a") { dg-error "Unclassifiable" } + + !$acc kernels async(.true.) { dg-error "Unclassifiable" } + !$acc parallel async(.true.) { dg-error "Unclassifiable" } + + ! default(none) + !$acc kernels default(none) + !$acc end kernels + !$acc parallel default(none) + !$acc end parallel + + !$acc kernels default (none) + !$acc end kernels + !$acc parallel default (none) + !$acc end parallel + + !$acc kernels default ( none ) + !$acc end kernels + !$acc parallel default ( none ) + !$acc end parallel + + !$acc kernels default { dg-error "Unclassifiable" } + !$acc parallel default { dg-error "Unclassifiable" } + + !$acc kernels default() { dg-error "Unclassifiable" } + !$acc parallel default() { dg-error "Unclassifiable" } + + !$acc kernels default(i) { dg-error "Unclassifiable" } + !$acc parallel default(i) { dg-error "Unclassifiable" } + + !$acc kernels default(1) { dg-error "Unclassifiable" } + !$acc parallel default(1) { dg-error "Unclassifiable" } + + ! Wait + !$acc kernels wait (l) ! { dg-error "INTEGER" } + !$acc end kernels + !$acc kernels wait (.true.) ! { dg-error "INTEGER" } + !$acc end kernels + !$acc kernels wait (i, 1) + !$acc end kernels + !$acc kernels wait (a) ! { dg-error "INTEGER" } + !$acc end kernels + !$acc kernels wait (b(5:6)) ! { dg-error "INTEGER" } + !$acc end kernels + + !$acc parallel wait (l) ! { dg-error "INTEGER" } + !$acc end parallel + !$acc parallel wait (.true.) ! { dg-error "INTEGER" } + !$acc end parallel + !$acc parallel wait (i, 1) + !$acc end parallel + !$acc parallel wait (a) ! { dg-error "INTEGER" } + !$acc end parallel + !$acc parallel wait (b(5:6)) ! { dg-error "INTEGER" } + !$acc end parallel +end diff --git a/gcc/testsuite/gfortran.dg/goacc/parallel-kernels-regions.f95 b/gcc/testsuite/gfortran.dg/goacc/parallel-kernels-regions.f95 new file mode 100644 index 00000000000..8b8e9893995 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/parallel-kernels-regions.f95 @@ -0,0 +1,55 @@ +! { dg-do compile } + +! OpenACC 2.0 allows nested parallel/kernels regions, but this is not yet +! supported. + +program test + implicit none + + integer :: i + + !$acc parallel + !$acc kernels ! { dg-bogus "kernels construct inside of parallel region" "not implemented" { xfail *-*-* } } + !$acc end kernels + !$acc end parallel + + !$acc parallel + !$acc parallel ! { dg-bogus "parallel construct inside of parallel region" "not implemented" { xfail *-*-* } } + !$acc end parallel + !$acc end parallel + + !$acc parallel + !$acc parallel ! { dg-bogus "parallel construct inside of parallel region" "not implemented" { xfail *-*-* } } + !$acc end parallel + !$acc kernels ! { dg-bogus "kernels construct inside of parallel region" "not implemented" { xfail *-*-* } } + !$acc end kernels + !$acc end parallel + + !$acc kernels + !$acc kernels ! { dg-bogus "kernels construct inside of kernels region" "not implemented" { xfail *-*-* } } + !$acc end kernels + !$acc end kernels + + !$acc kernels + !$acc parallel ! { dg-bogus "parallel construct inside of kernels region" "not implemented" { xfail *-*-* } } + !$acc end parallel + !$acc end kernels + + !$acc kernels + !$acc parallel ! { dg-bogus "parallel construct inside of kernels region" "not implemented" { xfail *-*-* } } + !$acc end parallel + !$acc kernels ! { dg-bogus "kernels construct inside of kernels region" "not implemented" { xfail *-*-* } } + !$acc end kernels + !$acc end kernels + + !$acc parallel + !$acc data ! { dg-error "data construct inside of parallel region" } + !$acc end data + !$acc end parallel + + !$acc kernels + !$acc data ! { dg-error "data construct inside of kernels region" } + !$acc end data + !$acc end kernels + +end program test diff --git a/gcc/testsuite/gfortran.dg/goacc/parallel-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/parallel-tree.f95 new file mode 100644 index 00000000000..48061b112fe --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/parallel-tree.f95 @@ -0,0 +1,41 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +! test for tree-dump-original and spaces-commas + +program test + implicit none + integer :: q, i, j, k, m, n, o, p, r, s, t, u, v, w + logical :: l + + !$acc parallel if(l) async num_gangs(i) num_workers(i) vector_length(i) & + !$acc reduction(max:q), copy(i), copyin(j), copyout(k), create(m) & + !$acc present(o), pcopy(p), pcopyin(r), pcopyout(s), pcreate(t) & + !$acc deviceptr(u), private(v), firstprivate(w) + !$acc end parallel + +end program test +! { dg-final { scan-tree-dump-times "pragma acc parallel" 1 "original" } } + +! { dg-final { scan-tree-dump-times "if" 1 "original" } } +! { dg-final { scan-tree-dump-times "async" 1 "original" } } +! { dg-final { scan-tree-dump-times "num_gangs" 1 "original" } } +! { dg-final { scan-tree-dump-times "num_workers" 1 "original" } } +! { dg-final { scan-tree-dump-times "vector_length" 1 "original" } } + +! { dg-final { scan-tree-dump-times "reduction\\(max:q\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_tofrom:i\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_to:j\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_from:k\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(force_alloc:m\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "map\\(force_present:o\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(tofrom:p\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(to:r\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(from:s\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(alloc:t\\)" 1 "original" } } + +! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "private\\(v\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "firstprivate\\(w\\)" 1 "original" } } +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/parameter.f95 b/gcc/testsuite/gfortran.dg/goacc/parameter.f95 new file mode 100644 index 00000000000..1364181b33c --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/parameter.f95 @@ -0,0 +1,32 @@ +! { dg-do compile } + +module test +contains + subroutine oacc1 + implicit none + integer :: i + integer, parameter :: a = 1 + !$acc declare device_resident (a) ! { dg-error "PARAMETER" } + !$acc data copy (a) ! { dg-error "not a variable" } + !$acc end data + !$acc data deviceptr (a) ! { dg-error "not a variable" } + !$acc end data + !$acc parallel private (a) ! { dg-error "not a variable" } + !$acc end parallel + !$acc host_data use_device (a) ! { dg-error "not a variable" } + !$acc end host_data + !$acc parallel loop reduction(+:a) ! { dg-error "not a variable" } + do i = 1,5 + enddo + !$acc end parallel loop + !$acc parallel loop + do i = 1,5 + !$acc cache (a) ! TODO: This must fail, as in openacc-1_0-branch + enddo + !$acc end parallel loop + !$acc update device (a) ! { dg-error "not a variable" } + !$acc update host (a) ! { dg-error "not a variable" } + !$acc update self (a) ! { dg-error "not a variable" } + end subroutine oacc1 +end module test +! { dg-prune-output "unimplemented" } diff --git a/gcc/testsuite/gfortran.dg/goacc/private-1.f95 b/gcc/testsuite/gfortran.dg/goacc/private-1.f95 new file mode 100644 index 00000000000..23ce95ad8d2 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/private-1.f95 @@ -0,0 +1,37 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-omplower" } + +! test for implicit private clauses in do loops + +program test + implicit none + integer :: i, j, k + + !$acc parallel + !$acc loop + do i = 1, 100 + end do + !$acc end parallel + + !$acc parallel + !$acc loop + do i = 1, 100 + do j = 1, 100 + end do + end do + !$acc end parallel + + !$acc parallel + !$acc loop + do i = 1, 100 + do j = 1, 100 + do k = 1, 100 + end do + end do + end do + !$acc end parallel +end program test +! { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel" 3 "omplower" } } +! { dg-final { scan-tree-dump-times "private\\(i\\)" 3 "omplower" } } +! { dg-final { scan-tree-dump-times "private\\(j\\)" 2 "omplower" } } +! { dg-final { scan-tree-dump-times "private\\(k\\)" 1 "omplower" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/private-2.f95 b/gcc/testsuite/gfortran.dg/goacc/private-2.f95 new file mode 100644 index 00000000000..4b038f2b5f2 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/private-2.f95 @@ -0,0 +1,39 @@ +! { dg-do compile } + +! test for implicit private clauses in do loops + +program test + implicit none + integer :: i, j, k, a(10) + + !$acc parallel + !$acc loop + do i = 1, 100 + end do + !$acc end parallel + + !$acc parallel + !$acc loop + do i = 1, 100 + do j = 1, 100 + end do + end do + !$acc end parallel + + !$acc data copy(a) + + if(mod(1,10) .eq. 0) write(*,'(i5)') i + + do i = 1, 100 + !$acc parallel + !$acc loop + do j = 1, 100 + do k = 1, 100 + end do + end do + !$acc end parallel + end do + + !$acc end data + +end program test diff --git a/gcc/testsuite/gfortran.dg/goacc/private-3.f95 b/gcc/testsuite/gfortran.dg/goacc/private-3.f95 new file mode 100644 index 00000000000..aa12a56f110 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/private-3.f95 @@ -0,0 +1,23 @@ +! { dg-do compile } + +! test for private variables in a reduction clause + +program test + implicit none + integer, parameter :: n = 100 + integer :: i, k + +! FIXME: This causes an ICE in the gimplifier. +! !$acc parallel private (k) reduction (+:k) +! do i = 1, n +! k = k + 1 +! end do +! !$acc end parallel + + !$acc parallel private (k) + !$acc loop reduction (+:k) + do i = 1, n + k = k + 1 + end do + !$acc end parallel +end program test diff --git a/gcc/testsuite/gfortran.dg/goacc/pure-elemental-procedures.f95 b/gcc/testsuite/gfortran.dg/goacc/pure-elemental-procedures.f95 new file mode 100644 index 00000000000..726e8e98b22 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/pure-elemental-procedures.f95 @@ -0,0 +1,78 @@ +! { dg-do compile } +! { dg-additional-options "-std=f2008 -fcoarray=single" } + +module test + implicit none +contains + elemental subroutine test1 + !$acc parallel ! { dg-error "may not appear in PURE procedures" } + end subroutine test1 + + pure subroutine test2 + !$acc parallel ! { dg-error "may not appear in PURE procedures" } + end subroutine test2 + + ! Implicit pure + elemental real function test3(x) + real, intent(in) :: x + !$acc parallel ! { dg-error "may not appear in PURE procedures" } + test3 = x*x + end function test3 + + pure real function test4(x) + real, intent(in) :: x + !$acc parallel ! { dg-error "may not appear in PURE procedures" } + test4 = x + end function test4 + + subroutine test5 + real :: x = 0.0 + integer :: i + !$acc parallel loop collapse(1) reduction(+:x) + do i = 1,10 + x = x + 0.3 + enddo + print *, x + end subroutine test5 + + real function test6(x) + real :: x + integer :: i + !$acc parallel loop collapse(1) reduction(+:x) + do i = 1,10 + x = x + 0.3 + enddo + test6 = x + end function test6 + + impure elemental real function test7(x) + real, intent(in) :: x + !$acc parallel + test7 = x + !$acc end parallel + end function test7 + + subroutine test8 + real :: x = 0.0 + integer :: i + !$acc parallel loop collapse(1) reduction(+:x) + do i = 1,10 + critical ! { dg-error "CRITICAL block inside of" } + x = x + 0.3 + end critical + enddo + print *, x + end subroutine test8 + + real function test9(n) + integer, value :: n + BLOCK + integer i + real sum + !$acc loop reduction(+:sum) + do i=1, n + sum = sum + sin(real(i)) + end do + END BLOCK + end function test9 +end module test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 new file mode 100644 index 00000000000..ffcec70c7ac --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 @@ -0,0 +1,21 @@ +! { dg-do compile } + +program reduction + integer, parameter :: n = 40, c = 10 + integer :: i, sum + + call redsub (sum, n, c) +end program reduction + +subroutine redsub(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel vector_length(n) copyin (n, c) + !$acc loop reduction(+:sum) + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub diff --git a/gcc/testsuite/gfortran.dg/goacc/reduction.f95 b/gcc/testsuite/gfortran.dg/goacc/reduction.f95 new file mode 100644 index 00000000000..833230ade80 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/reduction.f95 @@ -0,0 +1,138 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +subroutine foo (ia1) +integer :: i1, i2, i3 +integer, dimension (*) :: ia1 +integer, dimension (10) :: ia2 +real :: r1 +real, dimension (5) :: ra1 +double precision :: d1 +double precision, dimension (4) :: da1 +complex :: c1 +complex, dimension (7) :: ca1 +logical :: l1 +logical, dimension (3) :: la1 +character (5) :: a1 +type t + integer :: i +end type +type(t) :: t1 +type(t), dimension (2) :: ta1 +real, pointer :: p1 => NULL() +integer, allocatable :: aa1 (:,:) +save i2 +common /blk/ i1 + +!$acc parallel reduction (+:ia2) +!$acc end parallel +!$acc parallel reduction (+:ra1) +!$acc end parallel +!$acc parallel reduction (+:ca1) +!$acc end parallel +!$acc parallel reduction (+:da1) +!$acc end parallel +!$acc parallel reduction (.and.:la1) +!$acc end parallel +!$acc parallel reduction (+:i3, r1, d1, c1) +!$acc end parallel +!$acc parallel reduction (*:i3, r1, d1, c1) +!$acc end parallel +!$acc parallel reduction (-:i3, r1, d1, c1) +!$acc end parallel +!$acc parallel reduction (.and.:l1) +!$acc end parallel +!$acc parallel reduction (.or.:l1) +!$acc end parallel +!$acc parallel reduction (.eqv.:l1) +!$acc end parallel +!$acc parallel reduction (.neqv.:l1) +!$acc end parallel +!$acc parallel reduction (min:i3, r1, d1) +!$acc end parallel +!$acc parallel reduction (max:i3, r1, d1) +!$acc end parallel +!$acc parallel reduction (iand:i3) +!$acc end parallel +!$acc parallel reduction (ior:i3) +!$acc end parallel +!$acc parallel reduction (ieor:i3) +!$acc end parallel +!$acc parallel reduction (+:/blk/) ! { dg-error "Syntax error" } +!$acc end parallel ! { dg-error "Unexpected" } +!$acc parallel reduction (*:p1) ! { dg-error "POINTER object" } +!$acc end parallel +!$acc parallel reduction (-:aa1) +!$acc end parallel +!$acc parallel reduction (*:ia1) ! { dg-error "Assumed size" } +!$acc end parallel +!$acc parallel reduction (+:l1) ! { dg-error "OMP DECLARE REDUCTION \\+ not found for type LOGICAL" } +!$acc end parallel +!$acc parallel reduction (*:la1) ! { dg-error "OMP DECLARE REDUCTION \\* not found for type LOGICAL" } +!$acc end parallel +!$acc parallel reduction (-:a1) ! { dg-error "OMP DECLARE REDUCTION - not found for type CHARACTER" } +!$acc end parallel +!$acc parallel reduction (+:t1) ! { dg-error "OMP DECLARE REDUCTION \\+ not found for type TYPE" } +!$acc end parallel +!$acc parallel reduction (*:ta1) ! { dg-error "OMP DECLARE REDUCTION \\* not found for type TYPE" } +!$acc end parallel +!$acc parallel reduction (.and.:i3) ! { dg-error "OMP DECLARE REDUCTION \\.and\\. not found for type INTEGER" } +!$acc end parallel +!$acc parallel reduction (.or.:ia2) ! { dg-error "OMP DECLARE REDUCTION \\.or\\. not found for type INTEGER" } +!$acc end parallel +!$acc parallel reduction (.eqv.:r1) ! { dg-error "OMP DECLARE REDUCTION \\.eqv\\. not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (.neqv.:ra1) ! { dg-error "OMP DECLARE REDUCTION \\.neqv\\. not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (.and.:d1) ! { dg-error "OMP DECLARE REDUCTION \\.and\\. not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (.or.:da1) ! { dg-error "OMP DECLARE REDUCTION \\.or\\. not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (.eqv.:c1) ! { dg-error "OMP DECLARE REDUCTION \\.eqv\\. not found for type COMPLEX" } +!$acc end parallel +!$acc parallel reduction (.neqv.:ca1) ! { dg-error "OMP DECLARE REDUCTION \\.neqv\\. not found for type COMPLEX" } +!$acc end parallel +!$acc parallel reduction (.and.:a1) ! { dg-error "OMP DECLARE REDUCTION \\.and\\. not found for type CHARACTER" } +!$acc end parallel +!$acc parallel reduction (.or.:t1) ! { dg-error "OMP DECLARE REDUCTION \\.or\\. not found for type TYPE" } +!$acc end parallel +!$acc parallel reduction (.eqv.:ta1) ! { dg-error "OMP DECLARE REDUCTION \\.eqv\\. not found for type TYPE" } +!$acc end parallel +!$acc parallel reduction (min:c1) ! { dg-error "OMP DECLARE REDUCTION min not found for type COMPLEX" } +!$acc end parallel +!$acc parallel reduction (max:ca1) ! { dg-error "OMP DECLARE REDUCTION max not found for type COMPLEX" } +!$acc end parallel +!$acc parallel reduction (max:l1) ! { dg-error "OMP DECLARE REDUCTION max not found for type LOGICAL" } +!$acc end parallel +!$acc parallel reduction (min:la1) ! { dg-error "OMP DECLARE REDUCTION min not found for type LOGICAL" } +!$acc end parallel +!$acc parallel reduction (max:a1) ! { dg-error "OMP DECLARE REDUCTION max not found for type CHARACTER" } +!$acc end parallel +!$acc parallel reduction (min:t1) ! { dg-error "OMP DECLARE REDUCTION min not found for type TYPE" } +!$acc end parallel +!$acc parallel reduction (max:ta1) ! { dg-error "OMP DECLARE REDUCTION max not found for type TYPE" } +!$acc end parallel +!$acc parallel reduction (iand:r1) ! { dg-error "OMP DECLARE REDUCTION iand not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (ior:ra1) ! { dg-error "OMP DECLARE REDUCTION ior not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (ieor:d1) ! { dg-error "OMP DECLARE REDUCTION ieor not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (ior:da1) ! { dg-error "OMP DECLARE REDUCTION ior not found for type REAL" } +!$acc end parallel +!$acc parallel reduction (iand:c1) ! { dg-error "OMP DECLARE REDUCTION iand not found for type COMPLEX" } +!$acc end parallel +!$acc parallel reduction (ior:ca1) ! { dg-error "OMP DECLARE REDUCTION ior not found for type COMPLEX" } +!$acc end parallel +!$acc parallel reduction (ieor:l1) ! { dg-error "OMP DECLARE REDUCTION ieor not found for type LOGICAL" } +!$acc end parallel +!$acc parallel reduction (iand:la1) ! { dg-error "OMP DECLARE REDUCTION iand not found for type LOGICAL" } +!$acc end parallel +!$acc parallel reduction (ior:a1) ! { dg-error "OMP DECLARE REDUCTION ior not found for type CHARACTER" } +!$acc end parallel +!$acc parallel reduction (ieor:t1) ! { dg-error "OMP DECLARE REDUCTION ieor not found for type TYPE" } +!$acc end parallel +!$acc parallel reduction (iand:ta1) ! { dg-error "OMP DECLARE REDUCTION iand not found for type TYPE" } +!$acc end parallel + +end subroutine diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-1.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-1.f90 new file mode 100644 index 00000000000..67c5f11be6a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/routine-1.f90 @@ -0,0 +1,37 @@ +! { dg-do compile } + + integer, parameter :: n = 10 + integer :: a(n), i + integer, external :: fact + i = 1 + !$acc routine (fact) ! { dg-error "Unexpected \\\!\\\$ACC ROUTINE" } + !$acc routine () ! { dg-error "Syntax error in \\\!\\\$ACC ROUTINE \\\( NAME \\\)" } + !$acc parallel + !$acc loop + do i = 1, n + a(i) = fact (i) + call incr (a(i)) + end do + !$acc end parallel + do i = 1, n + write (*, "(I10)") a(i) + end do +end +recursive function fact (x) result (res) + integer, intent(in) :: x + integer :: res + res = 1 + !$acc routine ! { dg-error "Unexpected \\\!\\\$ACC ROUTINE" } + if (x < 1) then + res = 1 + else + res = x * fact (x - 1) + end if +end function fact +subroutine incr (x) + integer, intent(inout) :: x + integer i + i = 0 + !$acc routine ! { dg-error "Unexpected \\\!\\\$ACC ROUTINE" } + x = x + 1 +end subroutine incr diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-2.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-2.f90 new file mode 100644 index 00000000000..3be33511581 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/routine-2.f90 @@ -0,0 +1,17 @@ +! { dg-do compile } + + module m1 + contains + recursive function mfact (x) result (res) + integer, intent(in) :: x + integer :: res + integer i + i = 0 + !$acc routine ! { dg-error "Unexpected \\\!\\\$ACC ROUTINE" } + if (x < 1) then + res = 1 + else + res = x * mfact (x - 1) + end if + end function mfact + end module m1 diff --git a/gcc/testsuite/gfortran.dg/goacc/sentinel-free-form.f95 b/gcc/testsuite/gfortran.dg/goacc/sentinel-free-form.f95 new file mode 100644 index 00000000000..1a3189cb34e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/sentinel-free-form.f95 @@ -0,0 +1,21 @@ +! { dg-do compile } + +program test + implicit none + + integer :: i + real :: x + + ! sentinel may only be preceeded by white space + x = 0.0 !$acc parallel ! comment + ! sentinel must appear as a single word + ! $acc parallel ! comment + !$ acc parallel ! { dg-error "Unclassifiable statement" } + ! directive lines must have space after sentinel + !$accparallel ! { dg-warning "followed by a space" } + do i = 1,10 + x = x + 0.3 + enddo + !$acc end parallel ! { dg-error "Unexpected" } + print *, x +end \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/several-directives.f95 b/gcc/testsuite/gfortran.dg/goacc/several-directives.f95 new file mode 100644 index 00000000000..8fb97b53d38 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/several-directives.f95 @@ -0,0 +1,6 @@ +! { dg-do compile } + +program test + ! only one directive-name may appear in directive + !$acc parallel kernels ! { dg-error "Unclassifiable OpenACC directive" } +end \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/sie.f95 b/gcc/testsuite/gfortran.dg/goacc/sie.f95 new file mode 100644 index 00000000000..2d66026b4c5 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/sie.f95 @@ -0,0 +1,252 @@ +! { dg-do compile } +! { dg-additional-options "-fmax-errors=100" } + +! tests async, num_gangs, num_workers, vector_length, gang, worker, vector clauses + +program test + implicit none + + integer :: i + + !$acc parallel async + !$acc end parallel + + !$acc parallel async(3) + !$acc end parallel + + !$acc parallel async(i) + !$acc end parallel + + !$acc parallel async(i+1) + !$acc end parallel + + !$acc parallel async(-1) + !$acc end parallel + + !$acc parallel async(0) + !$acc end parallel + + !$acc parallel async() ! { dg-error "Invalid character in name" } + + !$acc parallel async(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel async(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel async("1") ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc kernels async + !$acc end kernels + + !$acc kernels async(3) + !$acc end kernels + + !$acc kernels async(i) + !$acc end kernels + + !$acc kernels async(i+1) + !$acc end kernels + + !$acc kernels async(-1) + !$acc end kernels + + !$acc kernels async(0) + !$acc end kernels + + !$acc kernels async() ! { dg-error "Invalid character in name" } + + !$acc kernels async(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels async(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels async("1") ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + + !$acc parallel num_gangs ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc parallel num_gangs(3) + !$acc end parallel + + !$acc parallel num_gangs(i) + !$acc end parallel + + !$acc parallel num_gangs(i+1) + !$acc end parallel + + !$acc parallel num_gangs(-1) ! { dg-warning "must be positive" } + !$acc end parallel + + !$acc parallel num_gangs(0) ! { dg-warning "must be positive" } + !$acc end parallel + + !$acc parallel num_gangs() ! { dg-error "Invalid character in name" } + + !$acc parallel num_gangs(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel num_gangs(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel num_gangs("1") ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + + !$acc parallel num_workers ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc parallel num_workers(3) + !$acc end parallel + + !$acc parallel num_workers(i) + !$acc end parallel + + !$acc parallel num_workers(i+1) + !$acc end parallel + + !$acc parallel num_workers(-1) ! { dg-warning "must be positive" } + !$acc end parallel + + !$acc parallel num_workers(0) ! { dg-warning "must be positive" } + !$acc end parallel + + !$acc parallel num_workers() ! { dg-error "Invalid character in name" } + + !$acc parallel num_workers(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel num_workers(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel num_workers("1") ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + + !$acc parallel vector_length ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc parallel vector_length(3) + !$acc end parallel + + !$acc parallel vector_length(i) + !$acc end parallel + + !$acc parallel vector_length(i+1) + !$acc end parallel + + !$acc parallel vector_length(-1) ! { dg-warning "must be positive" } + !$acc end parallel + + !$acc parallel vector_length(0) ! { dg-warning "must be positive" } + !$acc end parallel + + !$acc parallel vector_length() ! { dg-error "Invalid character in name" } + + !$acc parallel vector_length(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel vector_length(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + !$acc parallel vector_length("1") ! { dg-error "scalar INTEGER expression" } + !$acc end parallel + + + !$acc loop gang + do i = 1,10 + enddo + !$acc loop gang(3) + do i = 1,10 + enddo + !$acc loop gang(i) + do i = 1,10 + enddo + !$acc loop gang(i+1) + do i = 1,10 + enddo + !$acc loop gang(-1) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop gang(0) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop gang() ! { dg-error "Invalid character in name" } + do i = 1,10 + enddo + !$acc loop gang(1.5) ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + !$acc loop gang(.true.) ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + !$acc loop gang("1") ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + + + !$acc loop worker + do i = 1,10 + enddo + !$acc loop worker(3) + do i = 1,10 + enddo + !$acc loop worker(i) + do i = 1,10 + enddo + !$acc loop worker(i+1) + do i = 1,10 + enddo + !$acc loop worker(-1) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop worker(0) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop worker() ! { dg-error "Invalid character in name" } + do i = 1,10 + enddo + !$acc loop worker(1.5) ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + !$acc loop worker(.true.) ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + !$acc loop worker("1") ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + + + !$acc loop vector + do i = 1,10 + enddo + !$acc loop vector(3) + do i = 1,10 + enddo + !$acc loop vector(i) + do i = 1,10 + enddo + !$acc loop vector(i+1) + do i = 1,10 + enddo + !$acc loop vector(-1) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop vector(0) ! { dg-warning "must be positive" } + do i = 1,10 + enddo + !$acc loop vector() ! { dg-error "Invalid character in name" } + do i = 1,10 + enddo + !$acc loop vector(1.5) ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + !$acc loop vector(.true.) ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + !$acc loop vector("1") ! { dg-error "scalar INTEGER expression" } + do i = 1,10 + enddo + +end program test \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/goacc/subarrays.f95 b/gcc/testsuite/gfortran.dg/goacc/subarrays.f95 new file mode 100644 index 00000000000..4b3ef42d175 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/subarrays.f95 @@ -0,0 +1,41 @@ +! { dg-do compile } +program test + implicit none + integer :: a(10), b(10, 10), c(3:7), i + + !$acc parallel copy(a(1:5)) + !$acc end parallel + !$acc parallel copy(a(1 + 0 : 5 + 2)) + !$acc end parallel + !$acc parallel copy(a(:3)) + !$acc end parallel + !$acc parallel copy(a(3:)) + !$acc end parallel + !$acc parallel copy(a(:)) + !$acc end parallel + !$acc parallel copy(a(2:3,2:3)) + ! { dg-error "Rank mismatch" "" { target *-*-* } 16 } + ! { dg-error "'a' in MAP clause" "" { target *-*-* } 16 } + !$acc end parallel + !$acc parallel copy (a(:11)) ! { dg-warning "Upper array reference" } + !$acc end parallel + !$acc parallel copy (a(i:)) + !$acc end parallel + + !$acc parallel copy (a(:b)) + ! { dg-error "Array index" "" { target *-*-* } 25 } + ! { dg-error "'a' in MAP clause" "" { target *-*-* } 25 } + !$acc end parallel + + !$acc parallel copy (b(1:3,2:4)) + !$acc end parallel + !$acc parallel copy (b(2:3)) + ! { dg-error "Rank mismatch" "" { target *-*-* } 32 } + ! { dg-error "'b' in MAP clause" "" { target *-*-* } 32 } + !$acc end parallel + !$acc parallel copy (b(1:, 4:6)) + !$acc end parallel + + !$acc parallel copy (c(2:)) ! { dg-warning "Lower array reference" } + !$acc end parallel +end program test diff --git a/gcc/testsuite/gfortran.dg/gomp/map-1.f90 b/gcc/testsuite/gfortran.dg/gomp/map-1.f90 new file mode 100644 index 00000000000..e4b8b862afd --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/map-1.f90 @@ -0,0 +1,110 @@ +subroutine test(aas) + implicit none + + integer :: i, j(10), k(10, 10), aas(*) + integer, save :: tp + !$omp threadprivate(tp) + integer, parameter :: p = 1 + + type t + integer :: i, j(10) + end type t + + type(t) :: tt + + !$omp target map(i) + !$omp end target + + !$omp target map(j) + !$omp end target + + !$omp target map(p) ! { dg-error "Object 'p' is not a variable" } + !$omp end target + + !$omp target map(j(1)) + !$omp end target + + !$omp target map(j(i)) + !$omp end target + + !$omp target map(j(i:)) + !$omp end target + + !$omp target map(j(:i)) + !$omp end target + + !$omp target map(j(i:i+1)) + !$omp end target + + !$omp target map(j(11)) ! { dg-warning "out of bounds" } + !$omp end target + + !$omp target map(j(:11)) ! { dg-warning "out of bounds" } + !$omp end target + + !$omp target map(j(0:)) ! { dg-warning "out of bounds" } + !$omp end target + + !$omp target map(j(5:4)) + !$omp end target + + !$omp target map(j(5:)) + !$omp end target + + !$omp target map(j(:5)) + !$omp end target + + !$omp target map(j(:)) + !$omp end target + + !$omp target map(j(1:9:2)) ! { dg-error "Stride should not be specified for array section in MAP clause" } + !$omp end target + + !$omp target map(aas(5:)) + !$omp end target + ! { dg-error "Rightmost upper bound of assumed size array section not specified" "" { target *-*-* } 63 } + ! { dg-error "'aas' in MAP clause at \\\(1\\\) is not a proper array section" "" { target *-*-* } 63 } + + !$omp target map(aas(:)) + !$omp end target + ! { dg-error "Rightmost upper bound of assumed size array section not specified" "" { target *-*-* } 68 } + ! { dg-error "'aas' in MAP clause at \\\(1\\\) is not a proper array section" "" { target *-*-* } 68 } + + !$omp target map(aas) ! { dg-error "The upper bound in the last dimension must appear" "" { xfail *-*-* } } + !$omp end target + + !$omp target map(aas(5:7)) + !$omp end target + + !$omp target map(aas(:7)) + !$omp end target + + !$omp target map(k(5:)) + !$omp end target + ! { dg-error "Rank mismatch in array reference" "" { target *-*-* } 82 } + ! { dg-error "'k' in MAP clause at \\\(1\\\) is not a proper array section" "" { target *-*-* } 82 } + + !$omp target map(k(5:,:,3)) + !$omp end target + ! { dg-error "Rank mismatch in array reference" "" { target *-*-* } 87 } + ! { dg-error "'k' in MAP clause at \\\(1\\\) is not a proper array section" "" { target *-*-* } 87 } + + !$omp target map(tt) + !$omp end target + + !$omp target map(tt%i) ! { dg-error "Syntax error in OpenMP variable list" } + !$omp end target ! { dg-error "Unexpected !\\\$OMP END TARGET statement" } + + !$omp target map(tt%j) ! { dg-error "Syntax error in OpenMP variable list" } + !$omp end target ! { dg-error "Unexpected !\\\$OMP END TARGET statement" } + + ! broken test + !$omp target map(tt%j(1)) ! { dg-error "Syntax error in OpenMP variable list" } + !$omp end target ! { dg-error "Unexpected !\\\$OMP END TARGET statement" } + + !$omp target map(tt%j(1:)) ! { dg-error "Syntax error in OpenMP variable list" } + !$omp end target ! { dg-error "Unexpected !\\\$OMP END TARGET statement" } + + !$omp target map(tp) ! { dg-error "THREADPRIVATE object 'tp' in MAP clause" } + !$omp end target +end subroutine test diff --git a/gcc/testsuite/gfortran.dg/openacc-define-1.f90 b/gcc/testsuite/gfortran.dg/openacc-define-1.f90 new file mode 100644 index 00000000000..42f40734a1e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/openacc-define-1.f90 @@ -0,0 +1,7 @@ +! { dg-options "-cpp" } +! { dg-do preprocess } +! { dg-require-effective-target fopenacc } + +#ifdef _OPENACC +# error _OPENACC defined +#endif diff --git a/gcc/testsuite/gfortran.dg/openacc-define-2.f90 b/gcc/testsuite/gfortran.dg/openacc-define-2.f90 new file mode 100644 index 00000000000..8ad1bd55392 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/openacc-define-2.f90 @@ -0,0 +1,7 @@ +! { dg-options "-cpp -fno-openacc" } +! { dg-do preprocess } +! { dg-require-effective-target fopenacc } + +#ifdef _OPENACC +# error _OPENACC defined +#endif diff --git a/gcc/testsuite/gfortran.dg/openacc-define-3.f90 b/gcc/testsuite/gfortran.dg/openacc-define-3.f90 new file mode 100644 index 00000000000..b6c296e6b98 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/openacc-define-3.f90 @@ -0,0 +1,11 @@ +! { dg-options "-cpp -fopenacc" } +! { dg-do preprocess } +! { dg-require-effective-target fopenacc } + +#ifndef _OPENACC +# error _OPENACC not defined +#endif + +#if _OPENACC != 201306 +# error _OPENACC defined to wrong value +#endif diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 00872ab6520..e51d07d8d7c 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -788,6 +788,15 @@ proc check_effective_target_fgraphite {} { } "-O1 -fgraphite"] } +# Return 1 if compilation with -fopenacc is error-free for trivial +# code, 0 otherwise. + +proc check_effective_target_fopenacc {} { + return [check_no_compiler_messages fopenacc object { + void foo (void) { } + } "-fopenacc"] +} + # Return 1 if compilation with -fopenmp is error-free for trivial # code, 0 otherwise. diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 41f99d1a6fe..735ce5cf307 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -208,19 +208,19 @@ enum omp_clause_code { (c_parser_omp_variable_list). */ OMP_CLAUSE_ERROR = 0, - /* OpenMP clause: private (variable_list). */ + /* OpenACC/OpenMP clause: private (variable_list). */ OMP_CLAUSE_PRIVATE, /* OpenMP clause: shared (variable_list). */ OMP_CLAUSE_SHARED, - /* OpenMP clause: firstprivate (variable_list). */ + /* OpenACC/OpenMP clause: firstprivate (variable_list). */ OMP_CLAUSE_FIRSTPRIVATE, /* OpenMP clause: lastprivate (variable_list). */ OMP_CLAUSE_LASTPRIVATE, - /* OpenMP clause: reduction (operator:variable_list). + /* OpenACC/OpenMP clause: reduction (operator:variable_list). OMP_CLAUSE_REDUCTION_CODE: The tree_code of the operator. Operand 1: OMP_CLAUSE_REDUCTION_INIT: Stmt-list to initialize the var. Operand 2: OMP_CLAUSE_REDUCTION_MERGE: Stmt-list to merge private var @@ -253,13 +253,48 @@ enum omp_clause_code { /* OpenMP clause: to (variable-list). */ OMP_CLAUSE_TO, - /* OpenMP clause: map ({alloc:,to:,from:,tofrom:,}variable-list). */ + /* OpenACC clauses: {copy, copyin, copyout, create, delete, deviceptr, + device, host (self), present, present_or_copy (pcopy), present_or_copyin + (pcopyin), present_or_copyout (pcopyout), present_or_create (pcreate)} + (variable-list). + + OpenMP clause: map ({alloc:,to:,from:,tofrom:,}variable-list). */ OMP_CLAUSE_MAP, + /* Internal structure to hold OpenACC cache directive's variable-list. + #pragma acc cache (variable-list). */ + OMP_CLAUSE__CACHE_, + + /* OpenACC clause: device_resident (variable_list). */ + OMP_CLAUSE_DEVICE_RESIDENT, + + /* OpenACC clause: use_device (variable_list). */ + OMP_CLAUSE_USE_DEVICE, + + /* OpenACC clause: gang [(gang-argument-list)]. + Where + gang-argument-list: [gang-argument-list, ] gang-argument + gang-argument: [num:] integer-expression + | static: size-expression + size-expression: * | integer-expression. */ + OMP_CLAUSE_GANG, + + /* OpenACC clause: async [(integer-expression)]. */ + OMP_CLAUSE_ASYNC, + + /* OpenACC clause: wait [(integer-expression-list)]. */ + OMP_CLAUSE_WAIT, + + /* OpenACC clause: auto. */ + OMP_CLAUSE_AUTO, + + /* OpenACC clause: seq. */ + OMP_CLAUSE_SEQ, + /* Internal clause: temporary for combined loops expansion. */ OMP_CLAUSE__LOOPTEMP_, - /* OpenMP clause: if (scalar-expression). */ + /* OpenACC/OpenMP clause: if (scalar-expression). */ OMP_CLAUSE_IF, /* OpenMP clause: num_threads (integer-expression). */ @@ -277,7 +312,7 @@ enum omp_clause_code { /* OpenMP clause: default. */ OMP_CLAUSE_DEFAULT, - /* OpenMP clause: collapse (constant-integer-expression). */ + /* OpenACC/OpenMP clause: collapse (constant-integer-expression). */ OMP_CLAUSE_COLLAPSE, /* OpenMP clause: untied. */ @@ -333,7 +368,25 @@ enum omp_clause_code { /* Internally used only clause, holding _Cilk_for # of iterations on OMP_PARALLEL. */ - OMP_CLAUSE__CILK_FOR_COUNT_ + OMP_CLAUSE__CILK_FOR_COUNT_, + + /* OpenACC clause: independent. */ + OMP_CLAUSE_INDEPENDENT, + + /* OpenACC clause: worker [( [num:] integer-expression)]. */ + OMP_CLAUSE_WORKER, + + /* OpenACC clause: vector [( [length:] integer-expression)]. */ + OMP_CLAUSE_VECTOR, + + /* OpenACC clause: num_gangs (integer-expression). */ + OMP_CLAUSE_NUM_GANGS, + + /* OpenACC clause: num_workers (integer-expression). */ + OMP_CLAUSE_NUM_WORKERS, + + /* OpenACC clause: vector_length (integer-expression). */ + OMP_CLAUSE_VECTOR_LENGTH }; #undef DEFTREESTRUCT @@ -1172,24 +1225,6 @@ enum omp_clause_depend_kind OMP_CLAUSE_DEPEND_LAST }; -enum omp_clause_map_kind -{ - OMP_CLAUSE_MAP_ALLOC, - OMP_CLAUSE_MAP_TO, - OMP_CLAUSE_MAP_FROM, - OMP_CLAUSE_MAP_TOFROM, - /* The following kind is an internal only map kind, used for pointer based - array sections. OMP_CLAUSE_SIZE for these is not the pointer size, - which is implicitly POINTER_SIZE_UNITS, but the bias. */ - OMP_CLAUSE_MAP_POINTER, - /* Also internal, behaves like OMP_CLAUS_MAP_TO, but additionally any - OMP_CLAUSE_MAP_POINTER records consecutive after it which have addresses - falling into that range will not be ignored if OMP_CLAUSE_MAP_TO_PSET - wasn't mapped already. */ - OMP_CLAUSE_MAP_TO_PSET, - OMP_CLAUSE_MAP_LAST -}; - enum omp_clause_proc_bind_kind { /* Numbers should match omp_proc_bind_t enum in omp.h. */ @@ -1261,7 +1296,8 @@ struct GTY(()) tree_omp_clause { enum omp_clause_default_kind default_kind; enum omp_clause_schedule_kind schedule_kind; enum omp_clause_depend_kind depend_kind; - enum omp_clause_map_kind map_kind; + /* See include/gomp-constants.h for enum gomp_map_kind's values. */ + unsigned char map_kind; enum omp_clause_proc_bind_kind proc_bind_kind; enum tree_code reduction_code; } GTY ((skip)) subcode; diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index ac16e24e2ed..5443ab5bc32 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -4194,7 +4194,7 @@ estimate_num_insns (gimple stmt, eni_weights *weights) return (estimate_num_insns_seq (gimple_try_eval (stmt), weights) + estimate_num_insns_seq (gimple_try_cleanup (stmt), weights)); - /* OpenMP directives are generally very expensive. */ + /* OMP directives are generally very expensive. */ case GIMPLE_OMP_RETURN: case GIMPLE_OMP_SECTIONS_SWITCH: diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 526e6175577..f2e6d3acfd7 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -73,6 +73,7 @@ #include "expr.h" /* FIXME: For STACK_SAVEAREA_MODE and SAVE_NONLOCAL. */ #include "langhooks.h" #include "gimple-low.h" +#include "gomp-constants.h" /* The object of this pass is to lower the representation of a set of nested @@ -850,7 +851,7 @@ static void note_nonlocal_vla_type (struct nesting_info *info, tree type); /* A subroutine of convert_nonlocal_reference_op. Create a local variable in the nested function with DECL_VALUE_EXPR set to reference the true variable in the parent function. This is used both for debug info - and in OpenMP lowering. */ + and in OMP lowering. */ static tree get_nonlocal_debug_decl (struct nesting_info *info, tree decl) @@ -1399,7 +1400,7 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, break; case GIMPLE_OMP_TARGET: - if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION) + if (!is_gimple_omp_offloaded (stmt)) { save_suppress = info->suppress_expansion; convert_nonlocal_omp_clauses (gimple_omp_target_clauses_ptr (stmt), @@ -1418,7 +1419,7 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, decl = get_chain_decl (info); c = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_MAP); OMP_CLAUSE_DECL (c) = decl; - OMP_CLAUSE_MAP_KIND (c) = OMP_CLAUSE_MAP_TO; + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TO); OMP_CLAUSE_SIZE (c) = DECL_SIZE_UNIT (decl); OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (stmt); gimple_omp_target_set_clauses (as_a (stmt), c); @@ -1503,7 +1504,7 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, /* A subroutine of convert_local_reference. Create a local variable in the parent function with DECL_VALUE_EXPR set to reference the - field in FRAME. This is used both for debug info and in OpenMP + field in FRAME. This is used both for debug info and in OMP lowering. */ static tree @@ -1968,7 +1969,7 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, break; case GIMPLE_OMP_TARGET: - if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION) + if (!is_gimple_omp_offloaded (stmt)) { save_suppress = info->suppress_expansion; convert_local_omp_clauses (gimple_omp_target_clauses_ptr (stmt), wi); @@ -1984,7 +1985,7 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, (void) get_frame_type (info); c = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_MAP); OMP_CLAUSE_DECL (c) = info->frame_decl; - OMP_CLAUSE_MAP_KIND (c) = OMP_CLAUSE_MAP_TOFROM; + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM); OMP_CLAUSE_SIZE (c) = DECL_SIZE_UNIT (info->frame_decl); OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (stmt); gimple_omp_target_set_clauses (as_a (stmt), c); @@ -2301,7 +2302,7 @@ convert_tramp_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, } case GIMPLE_OMP_TARGET: - if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION) + if (!is_gimple_omp_offloaded (stmt)) { *handled_ops_p = false; return NULL_TREE; @@ -2400,7 +2401,7 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p, break; case GIMPLE_OMP_TARGET: - if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION) + if (!is_gimple_omp_offloaded (stmt)) { walk_body (convert_gimple_call, NULL, info, gimple_omp_body_ptr (stmt)); break; @@ -2425,8 +2426,7 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p, { c = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_MAP); OMP_CLAUSE_DECL (c) = decl; - OMP_CLAUSE_MAP_KIND (c) - = i ? OMP_CLAUSE_MAP_TO : OMP_CLAUSE_MAP_TOFROM; + OMP_CLAUSE_SET_MAP_KIND (c, i ? GOMP_MAP_TO : GOMP_MAP_TOFROM); OMP_CLAUSE_SIZE (c) = DECL_SIZE_UNIT (decl); OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (stmt); gimple_omp_target_set_clauses (as_a (stmt), diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 5773eba5843..d7c049f1f44 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -65,6 +65,7 @@ along with GCC; see the file COPYING3. If not see #include "value-prof.h" #include "wide-int-print.h" #include "internal-fn.h" +#include "gomp-constants.h" /* Local functions, macros and variables. */ static const char *op_symbol (const_tree); @@ -350,6 +351,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) case OMP_CLAUSE__LOOPTEMP_: name = "_looptemp_"; goto print_remap; + case OMP_CLAUSE_DEVICE_RESIDENT: + name = "device_resident"; + goto print_remap; + case OMP_CLAUSE_USE_DEVICE: + name = "use_device"; + goto print_remap; print_remap: pp_string (pp, name); pp_left_paren (pp); @@ -528,20 +535,41 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) pp_string (pp, "map("); switch (OMP_CLAUSE_MAP_KIND (clause)) { - case OMP_CLAUSE_MAP_ALLOC: - case OMP_CLAUSE_MAP_POINTER: + case GOMP_MAP_ALLOC: + case GOMP_MAP_POINTER: pp_string (pp, "alloc"); break; - case OMP_CLAUSE_MAP_TO: - case OMP_CLAUSE_MAP_TO_PSET: + case GOMP_MAP_TO: + case GOMP_MAP_TO_PSET: pp_string (pp, "to"); break; - case OMP_CLAUSE_MAP_FROM: + case GOMP_MAP_FROM: pp_string (pp, "from"); break; - case OMP_CLAUSE_MAP_TOFROM: + case GOMP_MAP_TOFROM: pp_string (pp, "tofrom"); break; + case GOMP_MAP_FORCE_ALLOC: + pp_string (pp, "force_alloc"); + break; + case GOMP_MAP_FORCE_TO: + pp_string (pp, "force_to"); + break; + case GOMP_MAP_FORCE_FROM: + pp_string (pp, "force_from"); + break; + case GOMP_MAP_FORCE_TOFROM: + pp_string (pp, "force_tofrom"); + break; + case GOMP_MAP_FORCE_PRESENT: + pp_string (pp, "force_present"); + break; + case GOMP_MAP_FORCE_DEALLOC: + pp_string (pp, "force_dealloc"); + break; + case GOMP_MAP_FORCE_DEVICEPTR: + pp_string (pp, "force_deviceptr"); + break; default: gcc_unreachable (); } @@ -552,10 +580,10 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) if (OMP_CLAUSE_SIZE (clause)) { if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (clause) == OMP_CLAUSE_MAP_POINTER) + && OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_POINTER) pp_string (pp, " [pointer assign, bias: "); else if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (clause) == OMP_CLAUSE_MAP_TO_PSET) + && OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_TO_PSET) pp_string (pp, " [pointer set, len: "); else pp_string (pp, " [len: "); @@ -578,6 +606,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) spc, flags, false); goto print_clause_size; + case OMP_CLAUSE__CACHE_: + pp_string (pp, "("); + dump_generic_node (pp, OMP_CLAUSE_DECL (clause), + spc, flags, false); + goto print_clause_size; + case OMP_CLAUSE_NUM_TEAMS: pp_string (pp, "num_teams("); dump_generic_node (pp, OMP_CLAUSE_NUM_TEAMS_EXPR (clause), @@ -651,6 +685,99 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) pp_right_paren (pp); break; + case OMP_CLAUSE_GANG: + pp_string (pp, "gang"); + if (OMP_CLAUSE_GANG_EXPR (clause) != NULL_TREE) + { + pp_string (pp, "(num: "); + dump_generic_node (pp, OMP_CLAUSE_GANG_EXPR (clause), + spc, flags, false); + } + if (OMP_CLAUSE_GANG_STATIC_EXPR (clause) != NULL_TREE) + { + if (OMP_CLAUSE_GANG_EXPR (clause) == NULL_TREE) + pp_left_paren (pp); + else + pp_space (pp); + pp_string (pp, "static:"); + if (OMP_CLAUSE_GANG_STATIC_EXPR (clause) + == integer_minus_one_node) + pp_character (pp, '*'); + else + dump_generic_node (pp, OMP_CLAUSE_GANG_STATIC_EXPR (clause), + spc, flags, false); + } + if (OMP_CLAUSE_GANG_EXPR (clause) != NULL_TREE + || OMP_CLAUSE_GANG_STATIC_EXPR (clause) != NULL_TREE) + pp_right_paren (pp); + break; + + case OMP_CLAUSE_ASYNC: + pp_string (pp, "async"); + if (OMP_CLAUSE_ASYNC_EXPR (clause)) + { + pp_character(pp, '('); + dump_generic_node (pp, OMP_CLAUSE_ASYNC_EXPR (clause), + spc, flags, false); + pp_character(pp, ')'); + } + break; + + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: + pp_string (pp, omp_clause_code_name[OMP_CLAUSE_CODE (clause)]); + break; + + case OMP_CLAUSE_WAIT: + pp_string (pp, "wait("); + dump_generic_node (pp, OMP_CLAUSE_WAIT_EXPR (clause), + spc, flags, false); + pp_character(pp, ')'); + break; + + case OMP_CLAUSE_WORKER: + pp_string (pp, "worker"); + if (OMP_CLAUSE_WORKER_EXPR (clause) != NULL_TREE) + { + pp_left_paren (pp); + dump_generic_node (pp, OMP_CLAUSE_WORKER_EXPR (clause), + spc, flags, false); + pp_right_paren (pp); + } + break; + + case OMP_CLAUSE_VECTOR: + pp_string (pp, "vector"); + if (OMP_CLAUSE_VECTOR_EXPR (clause) != NULL_TREE) + { + pp_left_paren (pp); + dump_generic_node (pp, OMP_CLAUSE_VECTOR_EXPR (clause), + spc, flags, false); + pp_right_paren (pp); + } + break; + + case OMP_CLAUSE_NUM_GANGS: + pp_string (pp, "num_gangs("); + dump_generic_node (pp, OMP_CLAUSE_NUM_GANGS_EXPR (clause), + spc, flags, false); + pp_character (pp, ')'); + break; + + case OMP_CLAUSE_NUM_WORKERS: + pp_string (pp, "num_workers("); + dump_generic_node (pp, OMP_CLAUSE_NUM_WORKERS_EXPR (clause), + spc, flags, false); + pp_character (pp, ')'); + break; + + case OMP_CLAUSE_VECTOR_LENGTH: + pp_string (pp, "vector_length("); + dump_generic_node (pp, OMP_CLAUSE_VECTOR_LENGTH_EXPR (clause), + spc, flags, false); + pp_character (pp, ')'); + break; + case OMP_CLAUSE_INBRANCH: pp_string (pp, "inbranch"); break; @@ -669,6 +796,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) case OMP_CLAUSE_TASKGROUP: pp_string (pp, "taskgroup"); break; + case OMP_CLAUSE_INDEPENDENT: + pp_string (pp, "independent"); + break; default: /* Should never happen. */ @@ -2433,6 +2563,51 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, int flags, pp_string (pp, " > "); break; + case OACC_PARALLEL: + pp_string (pp, "#pragma acc parallel"); + dump_omp_clauses (pp, OACC_PARALLEL_CLAUSES (node), spc, flags); + goto dump_omp_body; + + case OACC_KERNELS: + pp_string (pp, "#pragma acc kernels"); + dump_omp_clauses (pp, OACC_KERNELS_CLAUSES (node), spc, flags); + goto dump_omp_body; + + case OACC_DATA: + pp_string (pp, "#pragma acc data"); + dump_omp_clauses (pp, OACC_DATA_CLAUSES (node), spc, flags); + goto dump_omp_body; + + case OACC_HOST_DATA: + pp_string (pp, "#pragma acc host_data"); + dump_omp_clauses (pp, OACC_HOST_DATA_CLAUSES (node), spc, flags); + goto dump_omp_body; + + case OACC_DECLARE: + pp_string (pp, "#pragma acc declare"); + dump_omp_clauses (pp, OACC_DECLARE_CLAUSES (node), spc, flags); + break; + + case OACC_UPDATE: + pp_string (pp, "#pragma acc update"); + dump_omp_clauses (pp, OACC_UPDATE_CLAUSES (node), spc, flags); + break; + + case OACC_ENTER_DATA: + pp_string (pp, "#pragma acc enter data"); + dump_omp_clauses (pp, OACC_ENTER_DATA_CLAUSES (node), spc, flags); + break; + + case OACC_EXIT_DATA: + pp_string (pp, "#pragma acc exit data"); + dump_omp_clauses (pp, OACC_EXIT_DATA_CLAUSES (node), spc, flags); + break; + + case OACC_CACHE: + pp_string (pp, "#pragma acc cache"); + dump_omp_clauses (pp, OACC_CACHE_CLAUSES (node), spc, flags); + break; + case OMP_PARALLEL: pp_string (pp, "#pragma omp parallel"); dump_omp_clauses (pp, OMP_PARALLEL_CLAUSES (node), spc, flags); @@ -2477,6 +2652,10 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, int flags, pp_string (pp, "#pragma omp distribute"); goto dump_omp_loop; + case OACC_LOOP: + pp_string (pp, "#pragma acc loop"); + goto dump_omp_loop; + case OMP_TEAMS: pp_string (pp, "#pragma omp teams"); dump_omp_clauses (pp, OMP_TEAMS_CLAUSES (node), spc, flags); diff --git a/gcc/tree-streamer-in.c b/gcc/tree-streamer-in.c index c43685259de..67d33ed1fbc 100644 --- a/gcc/tree-streamer-in.c +++ b/gcc/tree-streamer-in.c @@ -59,6 +59,8 @@ along with GCC; see the file COPYING3. If not see #include "lto-streamer.h" #include "builtins.h" #include "ipa-chkp.h" +#include "gomp-constants.h" + /* Read a STRING_CST from the string table in DATA_IN using input block IB. */ @@ -435,8 +437,8 @@ unpack_ts_omp_clause_value_fields (struct data_in *data_in, = bp_unpack_enum (bp, omp_clause_depend_kind, OMP_CLAUSE_DEPEND_LAST); break; case OMP_CLAUSE_MAP: - OMP_CLAUSE_MAP_KIND (expr) - = bp_unpack_enum (bp, omp_clause_map_kind, OMP_CLAUSE_MAP_LAST); + OMP_CLAUSE_SET_MAP_KIND (expr, bp_unpack_enum (bp, gomp_map_kind, + GOMP_MAP_LAST)); break; case OMP_CLAUSE_PROC_BIND: OMP_CLAUSE_PROC_BIND_KIND (expr) diff --git a/gcc/tree-streamer-out.c b/gcc/tree-streamer-out.c index 480fb1e272c..3669680ae93 100644 --- a/gcc/tree-streamer-out.c +++ b/gcc/tree-streamer-out.c @@ -55,6 +55,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-streamer.h" #include "data-streamer.h" #include "streamer-hooks.h" +#include "gomp-constants.h" + /* Output the STRING constant to the string table in OB. Then put the index onto the INDEX_STREAM. */ @@ -395,7 +397,7 @@ pack_ts_omp_clause_value_fields (struct output_block *ob, OMP_CLAUSE_DEPEND_KIND (expr)); break; case OMP_CLAUSE_MAP: - bp_pack_enum (bp, omp_clause_map_kind, OMP_CLAUSE_MAP_LAST, + bp_pack_enum (bp, gomp_map_kind, GOMP_MAP_LAST, OMP_CLAUSE_MAP_KIND (expr)); break; case OMP_CLAUSE_PROC_BIND: diff --git a/gcc/tree.c b/gcc/tree.c index f9818b578af..e1d069d8dc0 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -329,6 +329,14 @@ unsigned const char omp_clause_num_ops[] = 2, /* OMP_CLAUSE_FROM */ 2, /* OMP_CLAUSE_TO */ 2, /* OMP_CLAUSE_MAP */ + 2, /* OMP_CLAUSE__CACHE_ */ + 1, /* OMP_CLAUSE_DEVICE_RESIDENT */ + 1, /* OMP_CLAUSE_USE_DEVICE */ + 2, /* OMP_CLAUSE_GANG */ + 1, /* OMP_CLAUSE_ASYNC */ + 1, /* OMP_CLAUSE_WAIT */ + 0, /* OMP_CLAUSE_AUTO */ + 0, /* OMP_CLAUSE_SEQ */ 1, /* OMP_CLAUSE__LOOPTEMP_ */ 1, /* OMP_CLAUSE_IF */ 1, /* OMP_CLAUSE_NUM_THREADS */ @@ -355,6 +363,12 @@ unsigned const char omp_clause_num_ops[] = 0, /* OMP_CLAUSE_TASKGROUP */ 1, /* OMP_CLAUSE__SIMDUID_ */ 1, /* OMP_CLAUSE__CILK_FOR_COUNT_ */ + 0, /* OMP_CLAUSE_INDEPENDENT */ + 1, /* OMP_CLAUSE_WORKER */ + 1, /* OMP_CLAUSE_VECTOR */ + 1, /* OMP_CLAUSE_NUM_GANGS */ + 1, /* OMP_CLAUSE_NUM_WORKERS */ + 1, /* OMP_CLAUSE_VECTOR_LENGTH */ }; const char * const omp_clause_code_name[] = @@ -374,6 +388,14 @@ const char * const omp_clause_code_name[] = "from", "to", "map", + "_cache_", + "device_resident", + "use_device", + "gang", + "async", + "wait", + "auto", + "seq", "_looptemp_", "if", "num_threads", @@ -399,7 +421,13 @@ const char * const omp_clause_code_name[] = "sections", "taskgroup", "_simduid_", - "_Cilk_for_count_" + "_Cilk_for_count_", + "independent", + "worker", + "vector", + "num_gangs", + "num_workers", + "vector_length" }; @@ -11167,6 +11195,19 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE: switch (OMP_CLAUSE_CODE (*tp)) { + case OMP_CLAUSE_GANG: + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1)); + /* FALLTHRU */ + + case OMP_CLAUSE_DEVICE_RESIDENT: + case OMP_CLAUSE_USE_DEVICE: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: case OMP_CLAUSE_PRIVATE: case OMP_CLAUSE_SHARED: case OMP_CLAUSE_FIRSTPRIVATE: @@ -11190,6 +11231,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); /* FALLTHRU */ + case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_NOWAIT: case OMP_CLAUSE_ORDERED: case OMP_CLAUSE_DEFAULT: @@ -11202,6 +11244,8 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE_PARALLEL: case OMP_CLAUSE_SECTIONS: case OMP_CLAUSE_TASKGROUP: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); case OMP_CLAUSE_LASTPRIVATE: @@ -11227,6 +11271,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE_FROM: case OMP_CLAUSE_TO: case OMP_CLAUSE_MAP: + case OMP_CLAUSE__CACHE_: WALK_SUBTREE (OMP_CLAUSE_DECL (*tp)); WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1)); WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); diff --git a/gcc/tree.def b/gcc/tree.def index 204c18211ed..b4b41642a6d 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1030,8 +1030,33 @@ DEFTREECODE (TARGET_MEM_REF, "target_mem_ref", tcc_reference, 5) chain of component references offsetting p by c. */ DEFTREECODE (MEM_REF, "mem_ref", tcc_reference, 2) -/* The ordering of the codes between OMP_PARALLEL and OMP_CRITICAL is - exposed to TREE_RANGE_CHECK. */ +/* OpenACC and OpenMP. As it is exposed in TREE_RANGE_CHECK invocations, do + not change the ordering of these codes. */ + +/* OpenACC - #pragma acc parallel [clause1 ... clauseN] + Operand 0: OACC_PARALLEL_BODY: Code to be executed in parallel. + Operand 1: OACC_PARALLEL_CLAUSES: List of clauses. */ + +DEFTREECODE (OACC_PARALLEL, "oacc_parallel", tcc_statement, 2) + +/* OpenACC - #pragma acc kernels [clause1 ... clauseN] + Operand 0: OACC_KERNELS_BODY: Sequence of kernels. + Operand 1: OACC_KERNELS_CLAUSES: List of clauses. */ + +DEFTREECODE (OACC_KERNELS, "oacc_kernels", tcc_statement, 2) + +/* OpenACC - #pragma acc data [clause1 ... clauseN] + Operand 0: OACC_DATA_BODY: Data construct body. + Operand 1: OACC_DATA_CLAUSES: List of clauses. */ + +DEFTREECODE (OACC_DATA, "oacc_data", tcc_statement, 2) + +/* OpenACC - #pragma acc host_data [clause1 ... clauseN] + Operand 0: OACC_HOST_DATA_BODY: Host_data construct body. + Operand 1: OACC_HOST_DATA_CLAUSES: List of clauses. */ + +DEFTREECODE (OACC_HOST_DATA, "oacc_host_data", tcc_statement, 2) + /* OpenMP - #pragma omp parallel [clause1 ... clauseN] Operand 0: OMP_PARALLEL_BODY: Code to be executed by all threads. Operand 1: OMP_PARALLEL_CLAUSES: List of clauses. */ @@ -1062,7 +1087,7 @@ DEFTREECODE (OMP_TASK, "omp_task", tcc_statement, 2) private. N1, N2 and INCR are required to be loop invariant integer expressions that are evaluated without any synchronization. The evaluation order, frequency of evaluation and side-effects are - unspecified by the standard. */ + unspecified by the standards. */ DEFTREECODE (OMP_FOR, "omp_for", tcc_statement, 6) /* OpenMP - #pragma omp simd [clause1 ... clauseN] @@ -1081,6 +1106,10 @@ DEFTREECODE (CILK_FOR, "cilk_for", tcc_statement, 6) Operands like for OMP_FOR. */ DEFTREECODE (OMP_DISTRIBUTE, "omp_distribute", tcc_statement, 6) +/* OpenMP - #pragma acc loop [clause1 ... clauseN] + Operands like for OMP_FOR. */ +DEFTREECODE (OACC_LOOP, "oacc_loop", tcc_statement, 6) + /* OpenMP - #pragma omp teams [clause1 ... clauseN] Operand 0: OMP_TEAMS_BODY: Teams body. Operand 1: OMP_TEAMS_CLAUSES: List of clauses. */ @@ -1127,6 +1156,27 @@ DEFTREECODE (OMP_ORDERED, "omp_ordered", tcc_statement, 1) Operand 1: OMP_CRITICAL_NAME: Identifier for critical section. */ DEFTREECODE (OMP_CRITICAL, "omp_critical", tcc_statement, 2) +/* OpenACC - #pragma acc cache (variable1 ... variableN) + Operand 0: OACC_CACHE_CLAUSES: List of variables (transformed into + OMP_CLAUSE__CACHE_ clauses). */ +DEFTREECODE (OACC_CACHE, "oacc_cache", tcc_statement, 1) + +/* OpenACC - #pragma acc declare [clause1 ... clauseN] + Operand 0: OACC_DECLARE_CLAUSES: List of clauses. */ +DEFTREECODE (OACC_DECLARE, "oacc_declare", tcc_statement, 1) + +/* OpenACC - #pragma acc enter data [clause1 ... clauseN] + Operand 0: OACC_ENTER_DATA_CLAUSES: List of clauses. */ +DEFTREECODE (OACC_ENTER_DATA, "oacc_enter_data", tcc_statement, 1) + +/* OpenACC - #pragma acc exit data [clause1 ... clauseN] + Operand 0: OACC_EXIT_DATA_CLAUSES: List of clauses. */ +DEFTREECODE (OACC_EXIT_DATA, "oacc_exit_data", tcc_statement, 1) + +/* OpenACC - #pragma acc update [clause1 ... clauseN] + Operand 0: OACC_UPDATE_CLAUSES: List of clauses. */ +DEFTREECODE (OACC_UPDATE, "oacc_update", tcc_statement, 1) + /* OpenMP - #pragma omp target update [clause1 ... clauseN] Operand 0: OMP_TARGET_UPDATE_CLAUSES: List of clauses. */ DEFTREECODE (OMP_TARGET_UPDATE, "omp_target_update", tcc_statement, 1) diff --git a/gcc/tree.h b/gcc/tree.h index ac2726811fa..4f83b38103b 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1195,12 +1195,47 @@ extern void protected_set_expr_location (tree, location_t); #define TRANSACTION_EXPR_RELAXED(NODE) \ (TRANSACTION_EXPR_CHECK (NODE)->base.public_flag) -/* OpenMP directive and clause accessors. */ +/* OpenMP and OpenACC directive and clause accessors. */ #define OMP_BODY(NODE) \ - TREE_OPERAND (TREE_RANGE_CHECK (NODE, OMP_PARALLEL, OMP_CRITICAL), 0) + TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_PARALLEL, OMP_CRITICAL), 0) #define OMP_CLAUSES(NODE) \ - TREE_OPERAND (TREE_RANGE_CHECK (NODE, OMP_PARALLEL, OMP_SINGLE), 1) + TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_PARALLEL, OMP_SINGLE), 1) + +#define OACC_PARALLEL_BODY(NODE) \ + TREE_OPERAND (OACC_PARALLEL_CHECK (NODE), 0) +#define OACC_PARALLEL_CLAUSES(NODE) \ + TREE_OPERAND (OACC_PARALLEL_CHECK (NODE), 1) + +#define OACC_KERNELS_BODY(NODE) \ + TREE_OPERAND (OACC_KERNELS_CHECK(NODE), 0) +#define OACC_KERNELS_CLAUSES(NODE) \ + TREE_OPERAND (OACC_KERNELS_CHECK(NODE), 1) + +#define OACC_DATA_BODY(NODE) \ + TREE_OPERAND (OACC_DATA_CHECK (NODE), 0) +#define OACC_DATA_CLAUSES(NODE) \ + TREE_OPERAND (OACC_DATA_CHECK (NODE), 1) + +#define OACC_HOST_DATA_BODY(NODE) \ + TREE_OPERAND (OACC_HOST_DATA_CHECK (NODE), 0) +#define OACC_HOST_DATA_CLAUSES(NODE) \ + TREE_OPERAND (OACC_HOST_DATA_CHECK (NODE), 1) + +#define OACC_CACHE_CLAUSES(NODE) \ + TREE_OPERAND (OACC_CACHE_CHECK (NODE), 0) + +#define OACC_DECLARE_CLAUSES(NODE) \ + TREE_OPERAND (OACC_DECLARE_CHECK (NODE), 0) + +#define OACC_ENTER_DATA_CLAUSES(NODE) \ + TREE_OPERAND (OACC_ENTER_DATA_CHECK (NODE), 0) + +#define OACC_EXIT_DATA_CLAUSES(NODE) \ + TREE_OPERAND (OACC_EXIT_DATA_CHECK (NODE), 0) + +#define OACC_UPDATE_CLAUSES(NODE) \ + TREE_OPERAND (OACC_UPDATE_CHECK (NODE), 0) #define OMP_PARALLEL_BODY(NODE) TREE_OPERAND (OMP_PARALLEL_CHECK (NODE), 0) #define OMP_PARALLEL_CLAUSES(NODE) TREE_OPERAND (OMP_PARALLEL_CHECK (NODE), 1) @@ -1212,7 +1247,7 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_TASKREG_BODY(NODE) TREE_OPERAND (OMP_TASKREG_CHECK (NODE), 0) #define OMP_TASKREG_CLAUSES(NODE) TREE_OPERAND (OMP_TASKREG_CHECK (NODE), 1) -#define OMP_LOOP_CHECK(NODE) TREE_RANGE_CHECK (NODE, OMP_FOR, OMP_DISTRIBUTE) +#define OMP_LOOP_CHECK(NODE) TREE_RANGE_CHECK (NODE, OMP_FOR, OACC_LOOP) #define OMP_FOR_BODY(NODE) TREE_OPERAND (OMP_LOOP_CHECK (NODE), 0) #define OMP_FOR_CLAUSES(NODE) TREE_OPERAND (OMP_LOOP_CHECK (NODE), 1) #define OMP_FOR_INIT(NODE) TREE_OPERAND (OMP_LOOP_CHECK (NODE), 2) @@ -1254,7 +1289,7 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_CLAUSE_SIZE(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ OMP_CLAUSE_FROM, \ - OMP_CLAUSE_MAP), 1) + OMP_CLAUSE__CACHE_), 1) #define OMP_CLAUSE_CHAIN(NODE) TREE_CHAIN (OMP_CLAUSE_CHECK (NODE)) #define OMP_CLAUSE_DECL(NODE) \ @@ -1271,6 +1306,15 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_SECTION_LAST(NODE) \ (OMP_SECTION_CHECK (NODE)->base.private_flag) +/* True on an OACC_KERNELS statement if is represents combined kernels loop + directive. */ +#define OACC_KERNELS_COMBINED(NODE) \ + (OACC_KERNELS_CHECK (NODE)->base.private_flag) + +/* Like OACC_KERNELS_COMBINED, but for parallel loop directive. */ +#define OACC_PARALLEL_COMBINED(NODE) \ + (OACC_PARALLEL_CHECK (NODE)->base.private_flag) + /* True on an OMP_PARALLEL statement if it represents an explicit combined parallel work-sharing constructs. */ #define OMP_PARALLEL_COMBINED(NODE) \ @@ -1313,15 +1357,47 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_CLAUSE_SCHEDULE_CHUNK_EXPR(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SCHEDULE), 0) +/* OpenACC clause expressions */ +#define OMP_CLAUSE_GANG_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GANG), 0) +#define OMP_CLAUSE_GANG_STATIC_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GANG), 1) +#define OMP_CLAUSE_ASYNC_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_ASYNC), 0) +#define OMP_CLAUSE_WAIT_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_WAIT), 0) +#define OMP_CLAUSE_VECTOR_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_VECTOR), 0) +#define OMP_CLAUSE_WORKER_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_WORKER), 0) +#define OMP_CLAUSE_NUM_GANGS_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_NUM_GANGS), 0) +#define OMP_CLAUSE_NUM_WORKERS_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_NUM_WORKERS), 0) +#define OMP_CLAUSE_VECTOR_LENGTH_EXPR(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_VECTOR_LENGTH), 0) + #define OMP_CLAUSE_DEPEND_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEPEND)->omp_clause.subcode.depend_kind) #define OMP_CLAUSE_MAP_KIND(NODE) \ - (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->omp_clause.subcode.map_kind) + ((enum gomp_map_kind) OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->omp_clause.subcode.map_kind) +#define OMP_CLAUSE_SET_MAP_KIND(NODE, MAP_KIND) \ + (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->omp_clause.subcode.map_kind \ + = (unsigned char) (MAP_KIND)) /* Nonzero if this map clause is for array (rather than pointer) based array - section with zero bias. Both the non-decl OMP_CLAUSE_MAP and - correspoidng OMP_CLAUSE_MAP_POINTER clause are marked with this flag. */ + section with zero bias. Both the non-decl OMP_CLAUSE_MAP and corresponding + OMP_CLAUSE_MAP with GOMP_MAP_POINTER are marked with this flag. */ #define OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->base.public_flag) diff --git a/gcc/varpool.c b/gcc/varpool.c index d43c80b978a..9c8f1ebd426 100644 --- a/gcc/varpool.c +++ b/gcc/varpool.c @@ -173,7 +173,7 @@ varpool_node::get_create (tree decl) node = varpool_node::create_empty (); node->decl = decl; - if (flag_openmp + if ((flag_openacc || flag_openmp) && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) { node->offloadable = 1; diff --git a/include/ChangeLog b/include/ChangeLog index c1011b9840b..0917d9443bc 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,9 @@ +2015-01-15 Thomas Schwinge + Julian Brown + James Norris + + * gomp-constants.h: New file. + 2015-12-14 Jan-Benedict Glaw * libiberty.h: Merge Copyright year update from Binutils. diff --git a/include/gomp-constants.h b/include/gomp-constants.h new file mode 100644 index 00000000000..e3d2820d763 --- /dev/null +++ b/include/gomp-constants.h @@ -0,0 +1,116 @@ +/* Communication between GCC and libgomp. + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef GOMP_CONSTANTS_H +#define GOMP_CONSTANTS_H 1 + +/* Memory mapping types. */ + +/* One byte. */ +#define GOMP_MAP_LAST (1 << 8) + +#define GOMP_MAP_FLAG_TO (1 << 0) +#define GOMP_MAP_FLAG_FROM (1 << 1) +/* Special map kinds, enumerated starting here. */ +#define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) +#define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) +#define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ + | GOMP_MAP_FLAG_SPECIAL_0) +/* Flag to force a specific behavior (or else, trigger a run-time error). */ +#define GOMP_MAP_FLAG_FORCE (1 << 7) + +enum gomp_map_kind + { + /* If not already present, allocate. */ + GOMP_MAP_ALLOC = 0, + /* ..., and copy to device. */ + GOMP_MAP_TO = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_TO), + /* ..., and copy from device. */ + GOMP_MAP_FROM = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_FROM), + /* ..., and copy to and from device. */ + GOMP_MAP_TOFROM = (GOMP_MAP_TO | GOMP_MAP_FROM), + /* The following kind is an internal only map kind, used for pointer based + array sections. OMP_CLAUSE_SIZE for these is not the pointer size, + which is implicitly POINTER_SIZE_UNITS, but the bias. */ + GOMP_MAP_POINTER = (GOMP_MAP_FLAG_SPECIAL_0 | 0), + /* Also internal, behaves like GOMP_MAP_TO, but additionally any + GOMP_MAP_POINTER records consecutive after it which have addresses + falling into that range will not be ignored if GOMP_MAP_TO_PSET wasn't + mapped already. */ + GOMP_MAP_TO_PSET = (GOMP_MAP_FLAG_SPECIAL_0 | 1), + /* Must already be present. */ + GOMP_MAP_FORCE_PRESENT = (GOMP_MAP_FLAG_SPECIAL_0 | 2), + /* Deallocate a mapping, without copying from device. */ + GOMP_MAP_FORCE_DEALLOC = (GOMP_MAP_FLAG_SPECIAL_0 | 3), + /* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is implicitly + POINTER_SIZE_UNITS. */ + GOMP_MAP_FORCE_DEVICEPTR = (GOMP_MAP_FLAG_SPECIAL_1 | 0), + /* Allocate. */ + GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC), + /* ..., and copy to device. */ + GOMP_MAP_FORCE_TO = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO), + /* ..., and copy from device. */ + GOMP_MAP_FORCE_FROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM), + /* ..., and copy to and from device. */ + GOMP_MAP_FORCE_TOFROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM) + }; + +#define GOMP_MAP_COPY_TO_P(X) \ + (!((X) & GOMP_MAP_FLAG_SPECIAL) \ + && ((X) & GOMP_MAP_FLAG_TO)) + +#define GOMP_MAP_COPY_FROM_P(X) \ + (!((X) & GOMP_MAP_FLAG_SPECIAL) \ + && ((X) & GOMP_MAP_FLAG_FROM)) + +#define GOMP_MAP_POINTER_P(X) \ + ((X) == GOMP_MAP_POINTER) + + +/* Asynchronous behavior. Keep in sync with + libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */ + +#define GOMP_ASYNC_NOVAL -1 +#define GOMP_ASYNC_SYNC -2 + + +/* Device codes. Keep in sync with + libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_device_t as well as + libgomp/libgomp_target.h. */ +#define GOMP_DEVICE_NONE 0 +#define GOMP_DEVICE_DEFAULT 1 +#define GOMP_DEVICE_HOST 2 +#define GOMP_DEVICE_HOST_NONSHM 3 +#define GOMP_DEVICE_NOT_HOST 4 +#define GOMP_DEVICE_NVIDIA_PTX 5 +#define GOMP_DEVICE_INTEL_MIC 6 + +#define GOMP_DEVICE_ICV -1 +#define GOMP_DEVICE_HOST_FALLBACK -2 + +#endif diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 6e1e14169b5..9b003cbb027 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,354 @@ +2015-01-15 Thomas Schwinge + James Norris + Tom de Vries + Julian Brown + Cesar Philippidis + Nathan Sidwell + Tobias Burnus + + * Makefile.am (search_path): Add $(top_srcdir)/../include. + (libgomp_la_SOURCES): Add splay-tree.c, libgomp-plugin.c, + oacc-parallel.c, oacc-host.c, oacc-init.c, oacc-mem.c, + oacc-async.c, oacc-plugin.c, oacc-cuda.c. + [USE_FORTRAN] (libgomp_la_SOURCES): Add openacc.f90. + Include $(top_srcdir)/plugin/Makefrag.am. + (nodist_libsubinclude_HEADERS): Add openacc.h. + [USE_FORTRAN] (nodist_finclude_HEADERS): Add openacc_lib.h, + openacc.f90, openacc.mod, openacc_kinds.mod. + (omp_lib.mod): Generalize into... + (%.mod): ... this new rule. + (openacc_kinds.mod, openacc.mod): New rules. + * plugin/configfrag.ac: New file. + * configure.ac: Move plugin/offloading support into it. Include + it. Instantiate testsuite/libgomp-test-support.pt.exp. + * plugin/Makefrag.am: New file. + * testsuite/Makefile.am (OFFLOAD_TARGETS) + (OFFLOAD_ADDITIONAL_OPTIONS, OFFLOAD_ADDITIONAL_LIB_PATHS): Don't + export. + (libgomp-test-support.exp): New rule. + (all-local): Depend on it. + * Makefile.in: Regenerate. + * testsuite/Makefile.in: Regenerate. + * config.h.in: Likewise. + * configure: Likewise. + * configure.tgt: Harden shell syntax. + * env.c: Include "oacc-int.h". + (parse_acc_device_type): New function. + (gomp_debug_var, goacc_device_type, goacc_device_num): New + variables. + (initialize_env): Initialize those. Call + goacc_runtime_initialize. + * error.c (gomp_vdebug, gomp_debug, gomp_vfatal): New functions. + (gomp_fatal): Call gomp_vfatal. + * libgomp.h: Include "libgomp-plugin.h" and . + (gomp_debug_var, goacc_device_type, goacc_device_num, gomp_vdebug) + (gomp_debug, gomp_verror, gomp_vfatal, gomp_init_targets_once) + (splay_tree_node, splay_tree, splay_tree_key) + (struct target_mem_desc, struct splay_tree_key_s) + (struct gomp_memory_mapping, struct acc_dispatch_t) + (struct gomp_device_descr, gomp_acc_insert_pointer) + (gomp_acc_remove_pointer, target_mem_desc, gomp_copy_from_async) + (gomp_unmap_vars, gomp_init_device, gomp_init_tables) + (gomp_free_memmap, gomp_fini_device): New declarations. + (gomp_vdebug, gomp_debug): New macros. + Include "splay-tree.h". + * libgomp.map (OACC_2.0): New symbol version. Use for + acc_get_num_devices, acc_get_num_devices_h_, acc_set_device_type, + acc_set_device_type_h_, acc_get_device_type, + acc_get_device_type_h_, acc_set_device_num, acc_set_device_num_h_, + acc_get_device_num, acc_get_device_num_h_, acc_async_test, + acc_async_test_h_, acc_async_test_all, acc_async_test_all_h_, + acc_wait, acc_wait_h_, acc_wait_async, acc_wait_async_h_, + acc_wait_all, acc_wait_all_h_, acc_wait_all_async, + acc_wait_all_async_h_, acc_init, acc_init_h_, acc_shutdown, + acc_shutdown_h_, acc_on_device, acc_on_device_h_, acc_malloc, + acc_free, acc_copyin, acc_copyin_32_h_, acc_copyin_64_h_, + acc_copyin_array_h_, acc_present_or_copyin, + acc_present_or_copyin_32_h_, acc_present_or_copyin_64_h_, + acc_present_or_copyin_array_h_, acc_create, acc_create_32_h_, + acc_create_64_h_, acc_create_array_h_, acc_present_or_create, + acc_present_or_create_32_h_, acc_present_or_create_64_h_, + acc_present_or_create_array_h_, acc_copyout, acc_copyout_32_h_, + acc_copyout_64_h_, acc_copyout_array_h_, acc_delete, + acc_delete_32_h_, acc_delete_64_h_, acc_delete_array_h_, + acc_update_device, acc_update_device_32_h_, + acc_update_device_64_h_, acc_update_device_array_h_, + acc_update_self, acc_update_self_32_h_, acc_update_self_64_h_, + acc_update_self_array_h_, acc_map_data, acc_unmap_data, + acc_deviceptr, acc_hostptr, acc_is_present, acc_is_present_32_h_, + acc_is_present_64_h_, acc_is_present_array_h_, + acc_memcpy_to_device, acc_memcpy_from_device, + acc_get_current_cuda_device, acc_get_current_cuda_context, + acc_get_cuda_stream, acc_set_cuda_stream. + (GOACC_2.0): New symbol version. Use for GOACC_data_end, + GOACC_data_start, GOACC_enter_exit_data, GOACC_parallel, + GOACC_update, GOACC_wait, GOACC_get_thread_num, + GOACC_get_num_threads. + (GOMP_PLUGIN_1.0): New symbol version. Use for + GOMP_PLUGIN_malloc, GOMP_PLUGIN_malloc_cleared, + GOMP_PLUGIN_realloc, GOMP_PLUGIN_debug, GOMP_PLUGIN_error, + GOMP_PLUGIN_fatal, GOMP_PLUGIN_async_unmap_vars, + GOMP_PLUGIN_acc_thread. + * libgomp.texi: Update for OpenACC changes, and GOMP_DEBUG + environment variable. + * libgomp_g.h (GOACC_data_start, GOACC_data_end) + (GOACC_enter_exit_data, GOACC_parallel, GOACC_update, GOACC_wait) + (GOACC_get_num_threads, GOACC_get_thread_num): New declarations. + * splay-tree.h (splay_tree_lookup, splay_tree_insert) + (splay_tree_remove): New declarations. + (rotate_left, rotate_right, splay_tree_splay, splay_tree_insert) + (splay_tree_remove, splay_tree_lookup): Move into... + * splay-tree.c: ... this new file. + * target.c: Include "oacc-plugin.h", "oacc-int.h", . + (splay_tree_node, splay_tree, splay_tree_key) + (struct target_mem_desc, struct splay_tree_key_s) + (struct gomp_device_descr): Don't declare. + (num_devices_openmp): New variable. + (gomp_get_num_devices ): Use it. + (gomp_init_targets_once): New function. + (gomp_get_num_devices ): Use it. + (get_kind, gomp_copy_from_async, gomp_free_memmap) + (gomp_fini_device, gomp_register_image_for_device): New functions. + (gomp_map_vars): Add devaddrs parameter. + (gomp_update): Add mm parameter. + (gomp_init_device): Move most of it into... + (gomp_init_tables): ... this new function. + (gomp_register_images_for_device): Remove function. + (splay_compare, gomp_map_vars, gomp_unmap_vars, gomp_init_device): + Make them hidden instead of static. + (gomp_map_vars_existing, gomp_map_vars, gomp_unmap_vars) + (gomp_update, gomp_init_device, GOMP_target, GOMP_target_data) + (GOMP_target_end_data, GOMP_target_update) + (gomp_load_plugin_for_device, gomp_target_init): Update for + OpenACC changes. + * oacc-async.c: New file. + * oacc-cuda.c: Likewise. + * oacc-host.c: Likewise. + * oacc-init.c: Likewise. + * oacc-int.h: Likewise. + * oacc-mem.c: Likewise. + * oacc-parallel.c: Likewise. + * oacc-plugin.c: Likewise. + * oacc-plugin.h: Likewise. + * oacc-ptx.h: Likewise. + * openacc.f90: Likewise. + * openacc.h: Likewise. + * openacc_lib.h: Likewise. + * plugin/plugin-host.c: Likewise. + * plugin/plugin-nvptx.c: Likewise. + * libgomp-plugin.c: Likewise. + * libgomp-plugin.h: Likewise. + * libgomp_target.h: Remove file after merging content into the + former file. Update all users. + * testsuite/lib/libgomp.exp: Load libgomp-test-support.exp. + (offload_targets_s, offload_targets_s_openacc): New variables. + (check_effective_target_openacc_nvidia_accel_present) + (check_effective_target_openacc_nvidia_accel_selected): New + procedures. + (libgomp_init): Update for OpenACC changes. + * testsuite/libgomp-test-support.exp.in: New file. + * testsuite/libgomp.oacc-c++/c++.exp: Likewise. + * testsuite/libgomp.oacc-c/c.exp: Likewise. + * testsuite/libgomp.oacc-fortran/fortran.exp: Likewise. + * testsuite/libgomp.oacc-c-c++-common/abort-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/abort-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/abort-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/abort-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/cache-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/clauses-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/collapse-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/collapse-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/context-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/context-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/context-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-5.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-6.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-7.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/data-already-8.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-empty.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-10.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-11.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-12.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-13.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-14.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-15.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-16.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-17.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-18.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-19.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-20.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-21.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-22.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-23.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-24.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-25.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-26.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-27.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-28.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-29.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-30.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-31.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-32.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-33.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-34.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-35.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-36.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-37.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-38.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-39.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-40.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-41.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-42.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-43.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-44.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-45.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-46.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-47.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-48.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-49.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-5.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-50.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-51.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-52.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-53.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-54.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-55.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-56.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-57.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-58.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-59.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-6.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-60.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-61.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-62.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-63.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-64.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-65.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-66.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-67.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-68.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-7.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-80.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-83.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-86.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-87.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-88.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-89.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-9.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-90.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-91.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-92.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/nested-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/nested-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/offset-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-empty.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/present-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/present-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/subr.h: Likewise. + * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Likewise. + * testsuite/libgomp.oacc-c-c++-common/timer.h: Likewise. + * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/update-1.c: Likewise. + * testsuite/libgomp.oacc-fortran/abort-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/abort-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f: Likewise. + * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-3.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-4.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-5.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-6.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-7.f90: Likewise. + * testsuite/libgomp.oacc-fortran/collapse-8.f90: Likewise. + * testsuite/libgomp.oacc-fortran/data-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/data-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/data-3.f90: Likewise. + * testsuite/libgomp.oacc-fortran/data-4-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/data-4.f90: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-1.f: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-3.f: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-4.f: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-5.f: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-6.f: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-7.f: Likewise. + * testsuite/libgomp.oacc-fortran/data-already-8.f: Likewise. + * testsuite/libgomp.oacc-fortran/lib-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-10.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-2.f: Likewise. + * testsuite/libgomp.oacc-fortran/lib-3.f: Likewise. + * testsuite/libgomp.oacc-fortran/lib-4.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-5.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-6.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-7.f90: Likewise. + * testsuite/libgomp.oacc-fortran/lib-8.f90: Likewise. + * testsuite/libgomp.oacc-fortran/map-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/openacc_version-1.f: Likewise. + * testsuite/libgomp.oacc-fortran/openacc_version-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/pointer-align-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/pset-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. + * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. + * testsuite/libgomp.oacc-fortran/routine-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/routine-2.f90: Likewise. + * testsuite/libgomp.oacc-fortran/routine-3.f90: Likewise. + * testsuite/libgomp.oacc-fortran/routine-4.f90: Likewise. + * testsuite/libgomp.oacc-fortran/subarrays-1.f90: Likewise. + * testsuite/libgomp.oacc-fortran/subarrays-2.f90: Likewise. + 2015-01-10 Thomas Schwinge Julian Brown David Malcolm diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 427415ee084..5411278b018 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -7,7 +7,8 @@ SUBDIRS = testsuite gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER) config_path = @config_path@ -search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) +search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) \ + $(top_srcdir)/../include fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/finclude libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include @@ -60,12 +61,21 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ - time.c fortran.c affinity.c target.c + time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ + oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ + oacc-plugin.c oacc-cuda.c + +include $(top_srcdir)/plugin/Makefrag.am + +if USE_FORTRAN +libgomp_la_SOURCES += openacc.f90 +endif nodist_noinst_HEADERS = libgomp_f.h -nodist_libsubinclude_HEADERS = omp.h +nodist_libsubinclude_HEADERS = omp.h openacc.h if USE_FORTRAN -nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod +nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ + openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod endif LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) @@ -75,7 +85,11 @@ LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ omp_lib_kinds.mod: omp_lib.mod : -omp_lib.mod: omp_lib.f90 +openacc_kinds.mod: openacc.mod + : +openacc.mod: openacc.lo + : +%.mod: %.f90 $(FC) $(FCFLAGS) -fsyntax-only $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 8e4774f6c0e..b61b108e36c 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -15,6 +15,34 @@ @SET_MAKE@ +# Plugins for offload execution, Makefile.am fragment. +# +# Copyright (C) 2014-2015 Free Software Foundation, Inc. +# +# Contributed by Mentor Embedded. +# +# This file is part of the GNU Offloading and Multi Processing Library +# (libgomp). +# +# Libgomp is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ @@ -36,13 +64,16 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -subdir = . -DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ +DIST_COMMON = $(top_srcdir)/plugin/Makefrag.am ChangeLog \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/configure $(am__configure_deps) \ $(srcdir)/config.h.in $(srcdir)/../mkinstalldirs \ $(srcdir)/omp.h.in $(srcdir)/omp_lib.h.in \ $(srcdir)/omp_lib.f90.in $(srcdir)/libgomp_f.h.in \ $(srcdir)/libgomp.spec.in $(srcdir)/../depcomp +@PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la +@USE_FORTRAN_TRUE@am__append_2 = openacc.f90 +subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ $(top_srcdir)/../config/depstand.m4 \ @@ -56,7 +87,8 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ $(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \ $(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \ $(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \ - $(top_srcdir)/../libtool.m4 $(top_srcdir)/configure.ac + $(top_srcdir)/../libtool.m4 $(top_srcdir)/plugin/configfrag.ac \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ @@ -91,12 +123,38 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \ "$(DESTDIR)$(fincludedir)" "$(DESTDIR)$(libsubincludedir)" \ "$(DESTDIR)$(toolexeclibdir)" LTLIBRARIES = $(toolexeclib_LTLIBRARIES) +libgomp_plugin_host_nonshm_la_DEPENDENCIES = libgomp.la +am_libgomp_plugin_host_nonshm_la_OBJECTS = \ + libgomp_plugin_host_nonshm_la-plugin-host.lo +libgomp_plugin_host_nonshm_la_OBJECTS = \ + $(am_libgomp_plugin_host_nonshm_la_OBJECTS) +libgomp_plugin_host_nonshm_la_LINK = $(LIBTOOL) --tag=CC \ + $(libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libgomp_plugin_host_nonshm_la_LDFLAGS) $(LDFLAGS) -o $@ +am__DEPENDENCIES_1 = +@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_DEPENDENCIES = libgomp.la \ +@PLUGIN_NVPTX_TRUE@ $(am__DEPENDENCIES_1) +@PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_OBJECTS = \ +@PLUGIN_NVPTX_TRUE@ libgomp_plugin_nvptx_la-plugin-nvptx.lo +libgomp_plugin_nvptx_la_OBJECTS = \ + $(am_libgomp_plugin_nvptx_la_OBJECTS) +libgomp_plugin_nvptx_la_LINK = $(LIBTOOL) --tag=CC \ + $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libgomp_plugin_nvptx_la_LDFLAGS) $(LDFLAGS) -o $@ +@PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_rpath = -rpath \ +@PLUGIN_NVPTX_TRUE@ $(toolexeclibdir) libgomp_la_LIBADD = +@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ parallel.lo sections.lo single.lo task.lo team.lo work.lo \ lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ - fortran.lo affinity.lo target.lo + fortran.lo affinity.lo target.lo splay-tree.lo \ + libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \ + oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/../depcomp @@ -108,7 +166,15 @@ LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) -SOURCES = $(libgomp_la_SOURCES) +FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) +LTFCCOMPILE = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) +FCLD = $(FC) +FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libgomp_plugin_host_nonshm_la_SOURCES) \ + $(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES) MULTISRCTOP = MULTIBUILDTOP = MULTIDIRS = @@ -155,6 +221,8 @@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ +CUDA_DRIVER_INCLUDE = @CUDA_DRIVER_INCLUDE@ +CUDA_DRIVER_LIB = @CUDA_DRIVER_LIB@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ @@ -213,6 +281,10 @@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PLUGIN_NVPTX = @PLUGIN_NVPTX@ +PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@ +PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@ +PLUGIN_NVPTX_LIBS = @PLUGIN_NVPTX_LIBS@ RANLIB = @RANLIB@ SECTION_LDFLAGS = @SECTION_LDFLAGS@ SED = @SED@ @@ -293,13 +365,16 @@ top_srcdir = @top_srcdir@ ACLOCAL_AMFLAGS = -I .. -I ../config SUBDIRS = testsuite gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER) -search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) +search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) \ + $(top_srcdir)/../include + fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/finclude libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include AM_CPPFLAGS = $(addprefix -I, $(search_path)) AM_CFLAGS = $(XCFLAGS) AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS) -toolexeclib_LTLIBRARIES = libgomp.la +toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) \ + libgomp-plugin-host_nonshm.la nodist_toolexeclib_HEADERS = libgomp.spec # -Wc is only a libtool option. @@ -318,13 +393,35 @@ libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script) \ libgomp_la_DEPENDENCIES = $(libgomp_version_dep) libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ - iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ - task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ - time.c fortran.c affinity.c target.c + iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \ + single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \ + bar.c ptrlock.c time.c fortran.c affinity.c target.c \ + splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ + oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ + $(am__append_2) +# Nvidia PTX OpenACC plugin. +@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) +@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_SOURCES = plugin/plugin-nvptx.c +@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_NVPTX_CPPFLAGS) +@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LDFLAGS = \ +@PLUGIN_NVPTX_TRUE@ $(libgomp_plugin_nvptx_version_info) \ +@PLUGIN_NVPTX_TRUE@ $(lt_host_flags) $(PLUGIN_NVPTX_LDFLAGS) +@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS) +@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static +libgomp_plugin_host_nonshm_version_info = -version-info $(libtool_VERSION) +libgomp_plugin_host_nonshm_la_SOURCES = plugin/plugin-host.c +libgomp_plugin_host_nonshm_la_CPPFLAGS = $(AM_CPPFLAGS) -DHOST_NONSHM_PLUGIN +libgomp_plugin_host_nonshm_la_LDFLAGS = \ + $(libgomp_plugin_host_nonshm_version_info) $(lt_host_flags) + +libgomp_plugin_host_nonshm_la_LIBADD = libgomp.la +libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS = --tag=disable-static nodist_noinst_HEADERS = libgomp_f.h -nodist_libsubinclude_HEADERS = omp.h -@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod +nodist_libsubinclude_HEADERS = omp.h openacc.h +@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ +@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LTLDFLAGS) -o $@ @@ -354,10 +451,10 @@ all: config.h $(MAKE) $(AM_MAKEFLAGS) all-recursive .SUFFIXES: -.SUFFIXES: .c .dvi .lo .o .obj .ps +.SUFFIXES: .c .dvi .f90 .lo .o .obj .ps am--refresh: @: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/plugin/Makefrag.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -447,6 +544,10 @@ clean-toolexeclibLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done +libgomp-plugin-host_nonshm.la: $(libgomp_plugin_host_nonshm_la_OBJECTS) $(libgomp_plugin_host_nonshm_la_DEPENDENCIES) + $(libgomp_plugin_host_nonshm_la_LINK) -rpath $(toolexeclibdir) $(libgomp_plugin_host_nonshm_la_OBJECTS) $(libgomp_plugin_host_nonshm_la_LIBADD) $(LIBS) +libgomp-plugin-nvptx.la: $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_DEPENDENCIES) + $(libgomp_plugin_nvptx_la_LINK) $(am_libgomp_plugin_nvptx_la_rpath) $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_LIBADD) $(LIBS) libgomp.la: $(libgomp_la_OBJECTS) $(libgomp_la_DEPENDENCIES) $(libgomp_la_LINK) -rpath $(toolexeclibdir) $(libgomp_la_OBJECTS) $(libgomp_la_LIBADD) $(LIBS) @@ -466,10 +567,20 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ @@ -477,6 +588,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ @@ -504,6 +616,29 @@ distclean-compile: @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< +libgomp_plugin_host_nonshm_la-plugin-host.lo: plugin/plugin-host.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_host_nonshm_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_host_nonshm_la-plugin-host.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Tpo -c -o libgomp_plugin_host_nonshm_la-plugin-host.lo `test -f 'plugin/plugin-host.c' || echo '$(srcdir)/'`plugin/plugin-host.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Tpo $(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='plugin/plugin-host.c' object='libgomp_plugin_host_nonshm_la-plugin-host.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_host_nonshm_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_host_nonshm_la-plugin-host.lo `test -f 'plugin/plugin-host.c' || echo '$(srcdir)/'`plugin/plugin-host.c + +libgomp_plugin_nvptx_la-plugin-nvptx.lo: plugin/plugin-nvptx.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_nvptx_la-plugin-nvptx.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='plugin/plugin-nvptx.c' object='libgomp_plugin_nvptx_la-plugin-nvptx.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c + +.f90.o: + $(FCCOMPILE) -c -o $@ $< + +.f90.obj: + $(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.f90.lo: + $(LTFCCOMPILE) -c -o $@ $< + mostlyclean-libtool: -rm -f *.lo @@ -1088,7 +1223,11 @@ vpath % $(strip $(search_path)) omp_lib_kinds.mod: omp_lib.mod : -omp_lib.mod: omp_lib.f90 +openacc_kinds.mod: openacc.mod + : +openacc.mod: openacc.lo + : +%.mod: %.f90 $(FC) $(FCFLAGS) -fsyntax-only $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h diff --git a/libgomp/config.h.in b/libgomp/config.h.in index a5e27ca4eaf..02547b16341 100644 --- a/libgomp/config.h.in +++ b/libgomp/config.h.in @@ -79,9 +79,6 @@ /* Define to 1 if the target supports thread-local storage. */ #undef HAVE_TLS -/* Define to 1 if the target use emutls for thread-local storage. */ -#undef USE_EMUTLS - /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H @@ -116,6 +113,9 @@ /* Define to the version of this package. */ #undef PACKAGE_VERSION +/* Define to 1 if the NVIDIA plugin is built, 0 if not. */ +#undef PLUGIN_NVPTX + /* Define if all infrastructure, needed for plugins, is supported. */ #undef PLUGIN_SUPPORT @@ -143,5 +143,8 @@ /* Define to 1 if you can safely include both and . */ #undef TIME_WITH_SYS_TIME +/* Define to 1 if the target use emutls for thread-local storage. */ +#undef USE_EMUTLS + /* Version number of package */ #undef VERSION diff --git a/libgomp/configure b/libgomp/configure index 3214e9dd84f..081870728e7 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -616,9 +616,6 @@ OMP_LOCK_SIZE USE_FORTRAN_FALSE USE_FORTRAN_TRUE link_gomp -offload_additional_lib_paths -offload_additional_options -offload_targets XLDFLAGS XCFLAGS config_path @@ -630,6 +627,17 @@ LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE OPT_LDFLAGS SECTION_LDFLAGS +PLUGIN_NVPTX_FALSE +PLUGIN_NVPTX_TRUE +offload_additional_lib_paths +offload_additional_options +PLUGIN_NVPTX_LIBS +PLUGIN_NVPTX_LDFLAGS +PLUGIN_NVPTX_CPPFLAGS +PLUGIN_NVPTX +CUDA_DRIVER_LIB +CUDA_DRIVER_INCLUDE +offload_targets libtool_VERSION ac_ct_FC FCFLAGS @@ -770,6 +778,9 @@ enable_fast_install with_gnu_ld enable_libtool_lock enable_maintainer_mode +with_cuda_driver +with_cuda_driver_include +with_cuda_driver_lib enable_linux_futex enable_tls enable_symvers @@ -1431,6 +1442,16 @@ Optional Packages: --with-pic try to use only PIC/non-PIC objects [default=use both] --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-cuda-driver=PATH specify prefix directory for installed CUDA driver + package. Equivalent to + --with-cuda-driver-include=PATH/include plus + --with-cuda-driver-lib=PATH/lib + --with-cuda-driver-include=PATH + specify directory for installed CUDA driver include + files + --with-cuda-driver-lib=PATH + specify directory for the installed CUDA driver + library Some influential environment variables: CC C compiler command @@ -11097,7 +11118,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11100 "configure" +#line 11121 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11203,7 +11224,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11206 "configure" +#line 11227 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -15055,6 +15076,36 @@ fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext +# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*- +# +# Copyright (C) 2014-2015 Free Software Foundation, Inc. +# +# Contributed by Mentor Embedded. +# +# This file is part of the GNU Offloading and Multi Processing Library +# (libgomp). +# +# Libgomp is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + +offload_targets= + plugin_support=yes { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5 $as_echo_n "checking for dlsym in -ldl... " >&6; } @@ -15107,8 +15158,152 @@ if test x"$plugin_support" = xyes; then $as_echo "#define PLUGIN_SUPPORT 1" >>confdefs.h + offload_targets=host_nonshm +elif test "x${enable_offload_targets-no}" != xno; then + as_fn_error "Can't support offloading without support for plugins" "$LINENO" 5 fi +# Look for the CUDA driver package. +CUDA_DRIVER_INCLUDE= +CUDA_DRIVER_LIB= + + +CUDA_DRIVER_CPPFLAGS= +CUDA_DRIVER_LDFLAGS= + +# Check whether --with-cuda-driver was given. +if test "${with_cuda_driver+set}" = set; then : + withval=$with_cuda_driver; +fi + + +# Check whether --with-cuda-driver-include was given. +if test "${with_cuda_driver_include+set}" = set; then : + withval=$with_cuda_driver_include; +fi + + +# Check whether --with-cuda-driver-lib was given. +if test "${with_cuda_driver_lib+set}" = set; then : + withval=$with_cuda_driver_lib; +fi + +if test "x$with_cuda_driver" != x; then + CUDA_DRIVER_INCLUDE=$with_cuda_driver/include + CUDA_DRIVER_LIB=$with_cuda_driver/lib +fi +if test "x$with_cuda_driver_include" != x; then + CUDA_DRIVER_INCLUDE=$with_cuda_driver_include +fi +if test "x$with_cuda_driver_lib" != x; then + CUDA_DRIVER_LIB=$with_cuda_driver_lib +fi +if test "x$CUDA_DRIVER_INCLUDE" != x; then + CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE +fi +if test "x$CUDA_DRIVER_LIB" != x; then + CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB +fi + +PLUGIN_NVPTX=0 +PLUGIN_NVPTX_CPPFLAGS= +PLUGIN_NVPTX_LDFLAGS= +PLUGIN_NVPTX_LIBS= + + + + + +# Get offload targets and path to install tree of offloading compiler. +offload_additional_options= +offload_additional_lib_paths= + + +if test x"$enable_offload_targets" != x; then + for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do + tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'` + tgt=`echo $tgt | sed 's/=.*//'` + case $tgt in + *-intelmic-* | *-intelmicemul-*) + tgt_name=intelmic + ;; + nvptx*) + tgt_name=nvptx + PLUGIN_NVPTX=$tgt + PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS + PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS + PLUGIN_NVPTX_LIBS='-lcuda' + + PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS" + PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS + LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS" + PLUGIN_NVPTX_save_LIBS=$LIBS + LIBS="$PLUGIN_NVPTX_LIBS $LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include "cuda.h" +int +main () +{ +CUresult r = cuCtxPushCurrent (NULL); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + PLUGIN_NVPTX=1 +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS + LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS + LIBS=$PLUGIN_NVPTX_save_LIBS + case $PLUGIN_NVPTX in + nvptx*) + PLUGIN_NVPTX=0 + as_fn_error "CUDA driver package required for nvptx support" "$LINENO" 5 + ;; + esac + ;; + *) + as_fn_error "unknown offload target specified" "$LINENO" 5 + ;; + esac + if test x"$offload_targets" = x; then + offload_targets=$tgt_name + else + offload_targets=$offload_targets,$tgt_name + fi + if test x"$tgt_dir" != x; then + offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin" + offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32" + else + offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)" + offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir" + fi + done +fi + +cat >>confdefs.h <<_ACEOF +#define OFFLOAD_TARGETS "$offload_targets" +_ACEOF + + if test $PLUGIN_NVPTX = 1; then + PLUGIN_NVPTX_TRUE= + PLUGIN_NVPTX_FALSE='#' +else + PLUGIN_NVPTX_TRUE='#' + PLUGIN_NVPTX_FALSE= +fi + + +cat >>confdefs.h <<_ACEOF +#define PLUGIN_NVPTX $PLUGIN_NVPTX +_ACEOF + + + # Check for functions needed. for ac_func in getloadavg clock_gettime strtoull do : @@ -16241,43 +16436,6 @@ else multilib_arg= fi -# Get accel target and path to install tree of accel compiler -offload_additional_options= -offload_additional_lib_paths= -offload_targets= -if test x"$enable_offload_targets" != x; then - for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do - tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'` - tgt=`echo $tgt | sed 's/=.*//'` - case $tgt in - *-intelmic-* | *-intelmicemul-*) - tgt_name="intelmic" ;; - *) - as_fn_error "unknown offload target specified" "$LINENO" 5 ;; - esac - if test x"$offload_targets" = x; then - offload_targets=$tgt_name - else - offload_targets=$offload_targets,$tgt_name - fi - if test x"$tgt_dir" != x; then - offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin" - offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32" - else - offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)" - offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir" - fi - done -fi - -cat >>confdefs.h <<_ACEOF -#define OFFLOAD_TARGETS "$offload_targets" -_ACEOF - - - - - # Set up the set of libraries that we need to link against for libgomp. # Note that the GOMP_SELF_SPEC in gcc.c may force -pthread, # which will force linkage against -lpthread (or equivalent for the system). @@ -16395,6 +16553,8 @@ ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h" ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" +ac_config_files="$ac_config_files testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in" + cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure @@ -16520,6 +16680,10 @@ if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then as_fn_error "conditional \"MAINTAINER_MODE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${PLUGIN_NVPTX_TRUE}" && test -z "${PLUGIN_NVPTX_FALSE}"; then + as_fn_error "conditional \"PLUGIN_NVPTX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE}"; then as_fn_error "conditional \"LIBGOMP_BUILD_VERSIONED_SHLIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -17535,6 +17699,7 @@ do "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "testsuite/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/Makefile" ;; "libgomp.spec") CONFIG_FILES="$CONFIG_FILES libgomp.spec" ;; + "testsuite/libgomp-test-support.pt.exp") CONFIG_FILES="$CONFIG_FILES testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in" ;; *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac diff --git a/libgomp/configure.ac b/libgomp/configure.ac index 8ed1baebcb8..4687b01d5cc 100644 --- a/libgomp/configure.ac +++ b/libgomp/configure.ac @@ -193,12 +193,7 @@ AC_LINK_IFELSE( [], [AC_MSG_ERROR([Pthreads are required to build libgomp])])]) -plugin_support=yes -AC_CHECK_LIB(dl, dlsym, , [plugin_support=no]) -if test x"$plugin_support" = xyes; then - AC_DEFINE(PLUGIN_SUPPORT, 1, - [Define if all infrastructure, needed for plugins, is supported.]) -fi +m4_include([plugin/configfrag.ac]) # Check for functions needed. AC_CHECK_FUNCS(getloadavg clock_gettime strtoull) @@ -283,40 +278,6 @@ else multilib_arg= fi -# Get accel target and path to install tree of accel compiler -offload_additional_options= -offload_additional_lib_paths= -offload_targets= -if test x"$enable_offload_targets" != x; then - for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do - tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'` - tgt=`echo $tgt | sed 's/=.*//'` - case $tgt in - *-intelmic-* | *-intelmicemul-*) - tgt_name="intelmic" ;; - *) - AC_MSG_ERROR([unknown offload target specified]) ;; - esac - if test x"$offload_targets" = x; then - offload_targets=$tgt_name - else - offload_targets=$offload_targets,$tgt_name - fi - if test x"$tgt_dir" != x; then - offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin" - offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32" - else - offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)" - offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir" - fi - done -fi -AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets", - [Define to hold the list of target names suitable for offloading.]) -AC_SUBST(offload_targets) -AC_SUBST(offload_additional_options) -AC_SUBST(offload_additional_lib_paths) - # Set up the set of libraries that we need to link against for libgomp. # Note that the GOMP_SELF_SPEC in gcc.c may force -pthread, # which will force linkage against -lpthread (or equivalent for the system). @@ -391,4 +352,5 @@ CFLAGS="$save_CFLAGS" AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) +AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in]) AC_OUTPUT diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt index ebd9be97d08..2ef49264e54 100644 --- a/libgomp/configure.tgt +++ b/libgomp/configure.tgt @@ -27,7 +27,7 @@ fi config_path="posix" # Check for futex enabled all at once. -if test $enable_linux_futex = yes; then +if test x$enable_linux_futex = xyes; then case "${target}" in aarch64*-*-linux*) diff --git a/libgomp/env.c b/libgomp/env.c index b05b73a6e20..6b5e963c4ea 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -28,6 +28,7 @@ #include "libgomp.h" #include "libgomp_f.h" +#include "oacc-int.h" #include #include #include @@ -77,6 +78,9 @@ char *gomp_bind_var_list; unsigned long gomp_bind_var_list_len; void **gomp_places_list; unsigned long gomp_places_list_len; +int gomp_debug_var; +char *goacc_device_type; +int goacc_device_num; /* Parse the OMP_SCHEDULE environment variable. */ @@ -1012,6 +1016,16 @@ parse_affinity (bool ignore) return false; } +static void +parse_acc_device_type (void) +{ + const char *env = getenv ("ACC_DEVICE_TYPE"); + + if (env && *env != '\0') + goacc_device_type = strdup (env); + else + goacc_device_type = NULL; +} static void handle_omp_display_env (unsigned long stacksize, int wait_policy) @@ -1182,6 +1196,7 @@ initialize_env (void) gomp_global_icv.thread_limit_var = thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var; } + parse_int ("GOMP_DEBUG", &gomp_debug_var, true); #ifndef HAVE_SYNC_BUILTINS gomp_mutex_init (&gomp_managed_threads_lock); #endif @@ -1272,6 +1287,15 @@ initialize_env (void) } handle_omp_display_env (stacksize, wait_policy); + + /* OpenACC. */ + + if (!parse_int ("ACC_DEVICE_NUM", &goacc_device_num, true)) + goacc_device_num = 0; + + parse_acc_device_type (); + + goacc_runtime_initialize (); } diff --git a/libgomp/error.c b/libgomp/error.c index e61d82f53d9..094c24a38c0 100644 --- a/libgomp/error.c +++ b/libgomp/error.c @@ -36,7 +36,26 @@ #include -static void +#undef gomp_vdebug +void +gomp_vdebug (int kind __attribute__ ((unused)), const char *msg, va_list list) +{ + if (gomp_debug_var) + vfprintf (stderr, msg, list); +} + +#undef gomp_debug +void +gomp_debug (int kind, const char *msg, ...) +{ + va_list list; + + va_start (list, msg); + gomp_vdebug (kind, msg, list); + va_end (list); +} + +void gomp_verror (const char *fmt, va_list list) { fputs ("\nlibgomp: ", stderr); @@ -54,14 +73,19 @@ gomp_error (const char *fmt, ...) va_end (list); } +void +gomp_vfatal (const char *fmt, va_list list) +{ + gomp_verror (fmt, list); + exit (EXIT_FAILURE); +} + void gomp_fatal (const char *fmt, ...) { va_list list; va_start (list, fmt); - gomp_verror (fmt, list); + gomp_vfatal (fmt, list); va_end (list); - - exit (EXIT_FAILURE); } diff --git a/libgomp/libgomp-plugin.c b/libgomp/libgomp-plugin.c new file mode 100644 index 00000000000..ffb22e93862 --- /dev/null +++ b/libgomp/libgomp-plugin.c @@ -0,0 +1,80 @@ +/* Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Exported (non-hidden) functions exposing libgomp interface for plugins. */ + +#include + +#include "libgomp.h" +#include "libgomp-plugin.h" + +void * +GOMP_PLUGIN_malloc (size_t size) +{ + return gomp_malloc (size); +} + +void * +GOMP_PLUGIN_malloc_cleared (size_t size) +{ + return gomp_malloc_cleared (size); +} + +void * +GOMP_PLUGIN_realloc (void *ptr, size_t size) +{ + return gomp_realloc (ptr, size); +} + +void +GOMP_PLUGIN_debug (int kind, const char *msg, ...) +{ + va_list ap; + + va_start (ap, msg); + gomp_debug (kind, msg, ap); + va_end (ap); +} + +void +GOMP_PLUGIN_error (const char *msg, ...) +{ + va_list ap; + + va_start (ap, msg); + gomp_verror (msg, ap); + va_end (ap); +} + +void +GOMP_PLUGIN_fatal (const char *msg, ...) +{ + va_list ap; + + va_start (ap, msg); + gomp_vfatal (msg, ap); + va_end (ap); +} diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h new file mode 100644 index 00000000000..d9cbff5fe71 --- /dev/null +++ b/libgomp/libgomp-plugin.h @@ -0,0 +1,80 @@ +/* Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* An interface to various libgomp-internal functions for use by plugins. */ + +#ifndef LIBGOMP_PLUGIN_H +#define LIBGOMP_PLUGIN_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Capabilities of offloading devices. */ +#define GOMP_OFFLOAD_CAP_SHARED_MEM (1 << 0) +#define GOMP_OFFLOAD_CAP_NATIVE_EXEC (1 << 1) +#define GOMP_OFFLOAD_CAP_OPENMP_400 (1 << 2) +#define GOMP_OFFLOAD_CAP_OPENACC_200 (1 << 3) + +/* Type of offload target device. Keep in sync with include/gomp-constants.h. */ +enum offload_target_type +{ + OFFLOAD_TARGET_TYPE_HOST = 2, + OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3, + OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, + OFFLOAD_TARGET_TYPE_INTEL_MIC = 6 +}; + +/* Auxiliary struct, used for transferring a host-target address range mapping + from plugin to libgomp. */ +struct mapping_table +{ + uintptr_t host_start; + uintptr_t host_end; + uintptr_t tgt_start; + uintptr_t tgt_end; +}; + +/* Miscellaneous functions. */ +extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); +extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); +extern void *GOMP_PLUGIN_realloc (void *, size_t); + +extern void GOMP_PLUGIN_debug (int, const char *, ...) + __attribute__ ((format (printf, 2, 3))); +extern void GOMP_PLUGIN_error (const char *, ...) + __attribute__ ((format (printf, 1, 2))); +extern void GOMP_PLUGIN_fatal (const char *, ...) + __attribute__ ((noreturn, format (printf, 1, 2))); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 05f34967242..3089401c47f 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -24,9 +24,10 @@ . */ /* This file contains data types and function declarations that are not - part of the official OpenMP user interface. There are declarations - in here that are part of the GNU OpenMP ABI, in that the compiler is - required to know about them and use them. + part of the official OpenACC or OpenMP user interfaces. There are + declarations in here that are part of the GNU Offloading and Multi + Processing ABI, in that the compiler is required to know about them + and use them. The convention is that the all caps prefix "GOMP" is used group items that are part of the external ABI, and the lower case prefix "gomp" @@ -37,10 +38,12 @@ #include "config.h" #include "gstdint.h" +#include "libgomp-plugin.h" #include #include #include +#include #ifdef HAVE_ATTRIBUTE_VISIBILITY # pragma GCC visibility push(hidden) @@ -221,6 +224,7 @@ struct gomp_team_state }; struct target_mem_desc; +struct gomp_memory_mapping; /* These are the OpenMP 4.0 Internal Control Variables described in section 2.3.1. Those described as having one copy per task are @@ -254,6 +258,9 @@ extern char *gomp_bind_var_list; extern unsigned long gomp_bind_var_list_len; extern void **gomp_places_list; extern unsigned long gomp_places_list_len; +extern int gomp_debug_var; +extern int goacc_device_num; +extern char *goacc_device_type; enum gomp_task_kind { @@ -533,10 +540,26 @@ extern void *gomp_realloc (void *, size_t); /* error.c */ +extern void gomp_vdebug (int, const char *, va_list); +extern void gomp_debug (int, const char *, ...) + __attribute__ ((format (printf, 2, 3))); +#define gomp_vdebug(KIND, FMT, VALIST) \ + do { \ + if (__builtin_expect (gomp_debug_var, 0)) \ + (gomp_vdebug) ((KIND), (FMT), (VALIST)); \ + } while (0) +#define gomp_debug(KIND, ...) \ + do { \ + if (__builtin_expect (gomp_debug_var, 0)) \ + (gomp_debug) ((KIND), __VA_ARGS__); \ + } while (0) +extern void gomp_verror (const char *, va_list); extern void gomp_error (const char *, ...) - __attribute__((format (printf, 1, 2))); + __attribute__ ((format (printf, 1, 2))); +extern void gomp_vfatal (const char *, va_list) + __attribute__ ((noreturn)); extern void gomp_fatal (const char *, ...) - __attribute__((noreturn, format (printf, 1, 2))); + __attribute__ ((noreturn, format (printf, 1, 2))); /* iter.c */ @@ -607,8 +630,192 @@ extern void gomp_free_thread (void *); /* target.c */ +extern void gomp_init_targets_once (void); extern int gomp_get_num_devices (void); +typedef struct splay_tree_node_s *splay_tree_node; +typedef struct splay_tree_s *splay_tree; +typedef struct splay_tree_key_s *splay_tree_key; + +struct target_mem_desc { + /* Reference count. */ + uintptr_t refcount; + /* All the splay nodes allocated together. */ + splay_tree_node array; + /* Start of the target region. */ + uintptr_t tgt_start; + /* End of the targer region. */ + uintptr_t tgt_end; + /* Handle to free. */ + void *to_free; + /* Previous target_mem_desc. */ + struct target_mem_desc *prev; + /* Number of items in following list. */ + size_t list_count; + + /* Corresponding target device descriptor. */ + struct gomp_device_descr *device_descr; + + /* Memory mapping info for the thread that created this descriptor. */ + struct gomp_memory_mapping *mem_map; + + /* List of splay keys to remove (or decrease refcount) + at the end of region. */ + splay_tree_key list[]; +}; + +struct splay_tree_key_s { + /* Address of the host object. */ + uintptr_t host_start; + /* Address immediately after the host object. */ + uintptr_t host_end; + /* Descriptor of the target memory. */ + struct target_mem_desc *tgt; + /* Offset from tgt->tgt_start to the start of the target object. */ + uintptr_t tgt_offset; + /* Reference count. */ + uintptr_t refcount; + /* Asynchronous reference count. */ + uintptr_t async_refcount; + /* True if data should be copied from device to host at the end. */ + bool copy_from; +}; + +#include "splay-tree.h" + +/* Information about mapped memory regions (per device/context). */ + +struct gomp_memory_mapping +{ + /* Mutex for operating with the splay tree and other shared structures. */ + gomp_mutex_t lock; + + /* True when tables have been added to this memory map. */ + bool is_initialized; + + /* Splay tree containing information about mapped memory regions. */ + struct splay_tree_s splay_tree; +}; + +typedef struct acc_dispatch_t +{ + /* This is a linked list of data mapped using the + acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas. + Unlike mapped_data in the goacc_thread struct, unmapping can + happen out-of-order with respect to mapping. */ + /* This is guarded by the lock in the "outer" struct gomp_device_descr. */ + struct target_mem_desc *data_environ; + + /* Extra information required for a device instance by a given target. */ + /* This is guarded by the lock in the "outer" struct gomp_device_descr. */ + void *target_data; + + /* Open or close a device instance. */ + void *(*open_device_func) (int n); + int (*close_device_func) (void *h); + + /* Set or get the device number. */ + int (*get_device_num_func) (void); + void (*set_device_num_func) (int); + + /* Execute. */ + void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *, + unsigned short *, int, int, int, int, void *); + + /* Async cleanup callback registration. */ + void (*register_async_cleanup_func) (void *); + + /* Asynchronous routines. */ + int (*async_test_func) (int); + int (*async_test_all_func) (void); + void (*async_wait_func) (int); + void (*async_wait_async_func) (int, int); + void (*async_wait_all_func) (void); + void (*async_wait_all_async_func) (int); + void (*async_set_async_func) (int); + + /* Create/destroy TLS data. */ + void *(*create_thread_data_func) (void *); + void (*destroy_thread_data_func) (void *); + + /* NVIDIA target specific routines. */ + struct { + void *(*get_current_device_func) (void); + void *(*get_current_context_func) (void); + void *(*get_stream_func) (int); + int (*set_stream_func) (int, void *); + } cuda; +} acc_dispatch_t; + +/* This structure describes accelerator device. + It contains name of the corresponding libgomp plugin, function handlers for + interaction with the device, ID-number of the device, and information about + mapped memory. */ +struct gomp_device_descr +{ + /* Immutable data, which is only set during initialization, and which is not + guarded by the lock. */ + + /* The name of the device. */ + const char *name; + + /* Capabilities of device (supports OpenACC, OpenMP). */ + unsigned int capabilities; + + /* This is the ID number of device among devices of the same type. */ + int target_id; + + /* This is the TYPE of device. */ + enum offload_target_type type; + + /* Function handlers. */ + const char *(*get_name_func) (void); + unsigned int (*get_caps_func) (void); + int (*get_type_func) (void); + int (*get_num_devices_func) (void); + void (*register_image_func) (void *, void *); + void (*init_device_func) (int); + void (*fini_device_func) (int); + int (*get_table_func) (int, struct mapping_table **); + void *(*alloc_func) (int, size_t); + void (*free_func) (int, void *); + void *(*dev2host_func) (int, void *, const void *, size_t); + void *(*host2dev_func) (int, void *, const void *, size_t); + void (*run_func) (int, void *, void *); + + /* Memory-mapping info for this device instance. */ + /* Uses a separate lock. */ + struct gomp_memory_mapping mem_map; + + /* Mutex for the mutable data. */ + gomp_mutex_t lock; + + /* Set to true when device is initialized. */ + bool is_initialized; + + /* True when offload regions have been registered with this device. */ + bool offload_regions_registered; + + /* OpenACC-specific data and functions. */ + /* This is mutable because of its mutable data_environ and target_data + members. */ + acc_dispatch_t openacc; +}; + +extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); +extern void gomp_acc_remove_pointer (void *, bool, int, int); + +extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, + size_t, void **, void **, + size_t *, void *, bool, bool); +extern void gomp_copy_from_async (struct target_mem_desc *); +extern void gomp_unmap_vars (struct target_mem_desc *, bool); +extern void gomp_init_device (struct gomp_device_descr *); +extern void gomp_init_tables (struct gomp_device_descr *, + struct gomp_memory_mapping *); +extern void gomp_free_memmap (struct gomp_memory_mapping *); +extern void gomp_fini_device (struct gomp_device_descr *); + /* work.c */ extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned); diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index f36df23e795..f44174e83b2 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -232,3 +232,107 @@ GOMP_4.0.1 { global: GOMP_offload_register; } GOMP_4.0; + +OACC_2.0 { + global: + acc_get_num_devices; + acc_get_num_devices_h_; + acc_set_device_type; + acc_set_device_type_h_; + acc_get_device_type; + acc_get_device_type_h_; + acc_set_device_num; + acc_set_device_num_h_; + acc_get_device_num; + acc_get_device_num_h_; + acc_async_test; + acc_async_test_h_; + acc_async_test_all; + acc_async_test_all_h_; + acc_wait; + acc_wait_h_; + acc_wait_async; + acc_wait_async_h_; + acc_wait_all; + acc_wait_all_h_; + acc_wait_all_async; + acc_wait_all_async_h_; + acc_init; + acc_init_h_; + acc_shutdown; + acc_shutdown_h_; + acc_on_device; + acc_on_device_h_; + acc_malloc; + acc_free; + acc_copyin; + acc_copyin_32_h_; + acc_copyin_64_h_; + acc_copyin_array_h_; + acc_present_or_copyin; + acc_present_or_copyin_32_h_; + acc_present_or_copyin_64_h_; + acc_present_or_copyin_array_h_; + acc_create; + acc_create_32_h_; + acc_create_64_h_; + acc_create_array_h_; + acc_present_or_create; + acc_present_or_create_32_h_; + acc_present_or_create_64_h_; + acc_present_or_create_array_h_; + acc_copyout; + acc_copyout_32_h_; + acc_copyout_64_h_; + acc_copyout_array_h_; + acc_delete; + acc_delete_32_h_; + acc_delete_64_h_; + acc_delete_array_h_; + acc_update_device; + acc_update_device_32_h_; + acc_update_device_64_h_; + acc_update_device_array_h_; + acc_update_self; + acc_update_self_32_h_; + acc_update_self_64_h_; + acc_update_self_array_h_; + acc_map_data; + acc_unmap_data; + acc_deviceptr; + acc_hostptr; + acc_is_present; + acc_is_present_32_h_; + acc_is_present_64_h_; + acc_is_present_array_h_; + acc_memcpy_to_device; + acc_memcpy_from_device; + acc_get_current_cuda_device; + acc_get_current_cuda_context; + acc_get_cuda_stream; + acc_set_cuda_stream; +}; + +GOACC_2.0 { + global: + GOACC_data_end; + GOACC_data_start; + GOACC_enter_exit_data; + GOACC_parallel; + GOACC_update; + GOACC_wait; + GOACC_get_thread_num; + GOACC_get_num_threads; +}; + +GOMP_PLUGIN_1.0 { + global: + GOMP_PLUGIN_malloc; + GOMP_PLUGIN_malloc_cleared; + GOMP_PLUGIN_realloc; + GOMP_PLUGIN_debug; + GOMP_PLUGIN_error; + GOMP_PLUGIN_fatal; + GOMP_PLUGIN_async_unmap_vars; + GOMP_PLUGIN_acc_thread; +}; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index b7306f14c6f..6c7f1aed641 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -35,8 +35,9 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) @end direntry This manual documents libgomp, the GNU Offloading and Multi Processing -Runtime library. This is the GNU implementation of the OpenMP API for -multi-platform shared-memory parallel programming in C/C++ and Fortran. +Runtime library. This is the GNU implementation of the OpenMP and +OpenACC APIs for parallel and accelerator programming in C/C++ and +Fortran. Published by the Free Software Foundation 51 Franklin Street, Fifth Floor @@ -50,7 +51,7 @@ Boston, MA 02110-1301 USA @titlepage @title GNU Offloading and Multi Processing Runtime Library -@subtitle The GNU OpenMP Implementation +@subtitle The GNU OpenMP and OpenACC Implementation @page @vskip 0pt plus 1filll @comment For the @value{version-GCC} Version* @@ -72,11 +73,18 @@ Boston, MA 02110-1301, USA@* @cindex Introduction This manual documents the usage of libgomp, the GNU Offloading and -Multi Processing Runtime Library. This is the GNU implementation of the -@uref{http://www.openmp.org, OpenMP} Application Programming Interface (API) -for multi-platform shared-memory parallel programming in C/C++ and Fortran. +Multi Processing Runtime Library. This includes the GNU +implementation of the @uref{http://www.openmp.org, OpenMP} Application +Programming Interface (API) for multi-platform shared-memory parallel +programming in C/C++ and Fortran, and the GNU implementation of the +@uref{http://www.openacc.org/, OpenACC} Application Programming +Interface (API) for offloading of code to accelerator devices in C/C++ +and Fortran. -Originally, libgomp was known as the GNU OpenMP Runtime Library. +Originally, libgomp implemented the GNU OpenMP Runtime Library. Based +on this, support for OpenACC and offloading (both OpenACC and OpenMP +4's target construct) has been added later on, and the library's name +changed to GNU Offloading and Multi Processing Runtime Library. @@ -1312,6 +1320,7 @@ beginning with @env{GOMP_} are GNU extensions. * OMP_THREAD_LIMIT:: Set the maximum number of threads * OMP_WAIT_POLICY:: How waiting threads are handled * GOMP_CPU_AFFINITY:: Bind threads to specific CPUs +* GOMP_DEBUG:: Enable debugging output * GOMP_STACKSIZE:: Set default thread stack size * GOMP_SPINCOUNT:: Set the busy-wait spin count @end menu @@ -1631,6 +1640,20 @@ If both @env{GOMP_CPU_AFFINITY} and @env{OMP_PROC_BIND} are set, +@node GOMP_DEBUG +@section @env{GOMP_DEBUG} -- Enable debugging output +@cindex Environment Variable +@table @asis +@item @emph{Description}: +Enable debugging output. The variable should be set to @code{0} +(disabled, also the default if not set), or @code{1} (enabled). + +If enabled, some debugging output will be printed during execution. +This is currently not specified in more detail, and subject to change. +@end table + + + @node GOMP_STACKSIZE @section @env{GOMP_STACKSIZE} -- Set default thread stack size @cindex Environment Variable @@ -2080,7 +2103,8 @@ becomes Bugs in the GNU Offloading and Multi Processing Runtime Library should be reported via @uref{http://gcc.gnu.org/bugzilla/, Bugzilla}. Please add -"openmp" to the keywords field in the bug report. +"openacc", or "openmp", or both to the keywords field in the bug +report, as appropriate. diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 56a4a97d1d5..c1e4e6367ab 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -215,4 +215,20 @@ extern void GOMP_target_update (int, const void *, size_t, void **, size_t *, unsigned char *); extern void GOMP_teams (unsigned int, unsigned int); +/* oacc-parallel.c */ + +extern void GOACC_data_start (int, const void *, + size_t, void **, size_t *, unsigned short *); +extern void GOACC_data_end (void); +extern void GOACC_enter_exit_data (int, const void *, size_t, void **, + size_t *, unsigned short *, int, int, ...); +extern void GOACC_parallel (int, void (*) (void *), const void *, size_t, + void **, size_t *, unsigned short *, int, int, int, + int, int, ...); +extern void GOACC_update (int, const void *, size_t, void **, size_t *, + unsigned short *, int, int, ...); +extern void GOACC_wait (int, int, ...); +extern int GOACC_get_num_threads (void); +extern int GOACC_get_thread_num (void); + #endif /* LIBGOMP_G_H */ diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c new file mode 100644 index 00000000000..08b7c5e1945 --- /dev/null +++ b/libgomp/oacc-async.c @@ -0,0 +1,77 @@ +/* OpenACC Runtime Library Definitions. + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + + +#include "openacc.h" +#include "libgomp.h" +#include "oacc-int.h" + +int +acc_async_test (int async) +{ + if (async < acc_async_sync) + gomp_fatal ("invalid async argument: %d", async); + + return base_dev->openacc.async_test_func (async); +} + +int +acc_async_test_all (void) +{ + return base_dev->openacc.async_test_all_func (); +} + +void +acc_wait (int async) +{ + if (async < acc_async_sync) + gomp_fatal ("invalid async argument: %d", async); + + base_dev->openacc.async_wait_func (async); +} + +void +acc_wait_async (int async1, int async2) +{ + base_dev->openacc.async_wait_async_func (async1, async2); +} + +void +acc_wait_all (void) +{ + base_dev->openacc.async_wait_all_func (); +} + +void +acc_wait_all_async (int async) +{ + if (async < acc_async_sync) + gomp_fatal ("invalid async argument: %d", async); + + base_dev->openacc.async_wait_all_async_func (async); +} diff --git a/libgomp/oacc-cuda.c b/libgomp/oacc-cuda.c new file mode 100644 index 00000000000..c8ef376e3a2 --- /dev/null +++ b/libgomp/oacc-cuda.c @@ -0,0 +1,84 @@ +/* OpenACC Runtime Library: CUDA support glue. + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "openacc.h" +#include "config.h" +#include "libgomp.h" +#include "oacc-int.h" + +void * +acc_get_current_cuda_device (void) +{ + void *p = NULL; + + if (base_dev && base_dev->openacc.cuda.get_current_device_func) + p = base_dev->openacc.cuda.get_current_device_func (); + + return p; +} + +void * +acc_get_current_cuda_context (void) +{ + void *p = NULL; + + if (base_dev && base_dev->openacc.cuda.get_current_context_func) + p = base_dev->openacc.cuda.get_current_context_func (); + + return p; +} + +void * +acc_get_cuda_stream (int async) +{ + void *p = NULL; + + if (async < 0) + return p; + + if (base_dev && base_dev->openacc.cuda.get_stream_func) + p = base_dev->openacc.cuda.get_stream_func (async); + + return p; +} + +int +acc_set_cuda_stream (int async, void *stream) +{ + int s = -1; + + if (async < 0 || stream == NULL) + return 0; + + goacc_lazy_initialize (); + + if (base_dev && base_dev->openacc.cuda.set_stream_func) + s = base_dev->openacc.cuda.set_stream_func (async, stream); + + return s; +} diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c new file mode 100644 index 00000000000..6aeb1e765d5 --- /dev/null +++ b/libgomp/oacc-host.c @@ -0,0 +1,100 @@ +/* OpenACC Runtime Library: acc_device_host. + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This shares much of the implementation of the plugin-host.c "host_nonshm" + plugin. */ +#include "plugin/plugin-host.c" + +static struct gomp_device_descr host_dispatch = + { + .name = "host", + .capabilities = (GOMP_OFFLOAD_CAP_OPENACC_200 + | GOMP_OFFLOAD_CAP_NATIVE_EXEC + | GOMP_OFFLOAD_CAP_SHARED_MEM), + .target_id = 0, + .type = OFFLOAD_TARGET_TYPE_HOST, + + .get_name_func = GOMP_OFFLOAD_get_name, + .get_caps_func = GOMP_OFFLOAD_get_caps, + .get_type_func = GOMP_OFFLOAD_get_type, + .get_num_devices_func = GOMP_OFFLOAD_get_num_devices, + .register_image_func = GOMP_OFFLOAD_register_image, + .init_device_func = GOMP_OFFLOAD_init_device, + .fini_device_func = GOMP_OFFLOAD_fini_device, + .get_table_func = GOMP_OFFLOAD_get_table, + .alloc_func = GOMP_OFFLOAD_alloc, + .free_func = GOMP_OFFLOAD_free, + .dev2host_func = GOMP_OFFLOAD_dev2host, + .host2dev_func = GOMP_OFFLOAD_host2dev, + .run_func = GOMP_OFFLOAD_run, + + .mem_map.is_initialized = false, + .mem_map.splay_tree.root = NULL, + .is_initialized = false, + .offload_regions_registered = false, + + .openacc = { + .open_device_func = GOMP_OFFLOAD_openacc_open_device, + .close_device_func = GOMP_OFFLOAD_openacc_close_device, + + .get_device_num_func = GOMP_OFFLOAD_openacc_get_device_num, + .set_device_num_func = GOMP_OFFLOAD_openacc_set_device_num, + + .exec_func = GOMP_OFFLOAD_openacc_parallel, + + .register_async_cleanup_func + = GOMP_OFFLOAD_openacc_register_async_cleanup, + + .async_set_async_func = GOMP_OFFLOAD_openacc_async_set_async, + .async_test_func = GOMP_OFFLOAD_openacc_async_test, + .async_test_all_func = GOMP_OFFLOAD_openacc_async_test_all, + .async_wait_func = GOMP_OFFLOAD_openacc_async_wait, + .async_wait_async_func = GOMP_OFFLOAD_openacc_async_wait_async, + .async_wait_all_func = GOMP_OFFLOAD_openacc_async_wait_all, + .async_wait_all_async_func = GOMP_OFFLOAD_openacc_async_wait_all_async, + + .create_thread_data_func = GOMP_OFFLOAD_openacc_create_thread_data, + .destroy_thread_data_func = GOMP_OFFLOAD_openacc_destroy_thread_data, + + .cuda = { + .get_current_device_func = NULL, + .get_current_context_func = NULL, + .get_stream_func = NULL, + .set_stream_func = NULL, + } + } + }; + +/* Register this device type. */ +static __attribute__ ((constructor)) +void goacc_host_init (void) +{ + gomp_mutex_init (&host_dispatch.mem_map.lock); + gomp_mutex_init (&host_dispatch.lock); + goacc_register (&host_dispatch); +} diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c new file mode 100644 index 00000000000..166eb553a6c --- /dev/null +++ b/libgomp/oacc-init.c @@ -0,0 +1,636 @@ +/* OpenACC Runtime initialization routines + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libgomp.h" +#include "oacc-int.h" +#include "openacc.h" +#include +#include +#include +#include +#include + +static gomp_mutex_t acc_device_lock; + +/* The dispatch table for the current accelerator device. This is global, so + you can only have one type of device open at any given time in a program. + This is the "base" device in that several devices that use the same + dispatch table may be active concurrently: this one (the "zeroth") is used + for overall initialisation/shutdown, and other instances -- not necessarily + including this one -- may be opened and closed once the base device has + been initialized. */ +struct gomp_device_descr *base_dev; + +#if defined HAVE_TLS || defined USE_EMUTLS +__thread struct goacc_thread *goacc_tls_data; +#else +pthread_key_t goacc_tls_key; +#endif +static pthread_key_t goacc_cleanup_key; + +/* Current dispatcher, and how it was initialized */ +static acc_device_t init_key = _ACC_device_hwm; + +static struct goacc_thread *goacc_threads; +static gomp_mutex_t goacc_thread_lock; + +/* An array of dispatchers for device types, indexed by the type. This array + only references "base" devices, and other instances of the same type are + found by simply indexing from each such device (which are stored linearly, + grouped by device in target.c:devices). */ +static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; + +attribute_hidden void +goacc_register (struct gomp_device_descr *disp) +{ + /* Only register the 0th device here. */ + if (disp->target_id != 0) + return; + + gomp_mutex_lock (&acc_device_lock); + + assert (acc_device_type (disp->type) != acc_device_none + && acc_device_type (disp->type) != acc_device_default + && acc_device_type (disp->type) != acc_device_not_host); + assert (!dispatchers[disp->type]); + dispatchers[disp->type] = disp; + + gomp_mutex_unlock (&acc_device_lock); +} + +/* OpenACC names some things a little differently. */ + +static const char * +get_openacc_name (const char *name) +{ + if (strcmp (name, "nvptx") == 0) + return "nvidia"; + else + return name; +} + +static struct gomp_device_descr * +resolve_device (acc_device_t d) +{ + acc_device_t d_arg = d; + + switch (d) + { + case acc_device_default: + { + if (goacc_device_type) + { + /* Lookup the named device. */ + while (++d != _ACC_device_hwm) + if (dispatchers[d] + && !strcasecmp (goacc_device_type, + get_openacc_name (dispatchers[d]->name)) + && dispatchers[d]->get_num_devices_func () > 0) + goto found; + + gomp_fatal ("device type %s not supported", goacc_device_type); + } + + /* No default device specified, so start scanning for any non-host + device that is available. */ + d = acc_device_not_host; + } + /* FALLTHROUGH */ + + case acc_device_not_host: + /* Find the first available device after acc_device_not_host. */ + while (++d != _ACC_device_hwm) + if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0) + goto found; + if (d_arg == acc_device_default) + { + d = acc_device_host; + goto found; + } + gomp_fatal ("no device found"); + break; + + case acc_device_host: + break; + + default: + if (d > _ACC_device_hwm) + gomp_fatal ("device %u out of range", (unsigned)d); + break; + } + found: + + assert (d != acc_device_none + && d != acc_device_default + && d != acc_device_not_host); + + return dispatchers[d]; +} + +/* This is called when plugins have been initialized, and serves to call + (indirectly) the target's device_init hook. Calling multiple times without + an intervening acc_shutdown_1 call is an error. */ + +static struct gomp_device_descr * +acc_init_1 (acc_device_t d) +{ + struct gomp_device_descr *acc_dev; + + acc_dev = resolve_device (d); + + if (!acc_dev || acc_dev->get_num_devices_func () <= 0) + gomp_fatal ("device %u not supported", (unsigned)d); + + if (acc_dev->is_initialized) + gomp_fatal ("device already active"); + + /* We need to remember what we were intialized as, to check shutdown etc. */ + init_key = d; + + gomp_init_device (acc_dev); + + return acc_dev; +} + +static struct goacc_thread * +goacc_new_thread (void) +{ + struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread)); + +#if defined HAVE_TLS || defined USE_EMUTLS + goacc_tls_data = thr; +#else + pthread_setspecific (goacc_tls_key, thr); +#endif + + pthread_setspecific (goacc_cleanup_key, thr); + + gomp_mutex_lock (&goacc_thread_lock); + thr->next = goacc_threads; + goacc_threads = thr; + gomp_mutex_unlock (&goacc_thread_lock); + + return thr; +} + +static void +goacc_destroy_thread (void *data) +{ + struct goacc_thread *thr = data, *walk, *prev; + + gomp_mutex_lock (&goacc_thread_lock); + + if (thr) + { + if (base_dev && thr->target_tls) + { + base_dev->openacc.destroy_thread_data_func (thr->target_tls); + thr->target_tls = NULL; + } + + assert (!thr->mapped_data); + + /* Remove from thread list. */ + for (prev = NULL, walk = goacc_threads; walk; + prev = walk, walk = walk->next) + if (walk == thr) + { + if (prev == NULL) + goacc_threads = walk->next; + else + prev->next = walk->next; + + free (thr); + + break; + } + + assert (walk); + } + + gomp_mutex_unlock (&goacc_thread_lock); +} + +/* Open the ORD'th device of the currently-active type (base_dev must be + initialised before calling). If ORD is < 0, open the default-numbered + device (set by the ACC_DEVICE_NUM environment variable or a call to + acc_set_device_num), or leave any currently-opened device as is. "Opening" + consists of calling the device's open_device_func hook, and setting up + thread-local data (maybe allocating, then initializing with information + pertaining to the newly-opened or previously-opened device). */ + +static void +lazy_open (int ord) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev; + + if (thr && thr->dev) + { + assert (ord < 0 || ord == thr->dev->target_id); + return; + } + + assert (base_dev); + + if (ord < 0) + ord = goacc_device_num; + + /* The OpenACC 2.0 spec leaves the runtime's behaviour when an out-of-range + device is requested as implementation-defined (4.2 ACC_DEVICE_NUM). + We choose to raise an error in such a case. */ + if (ord >= base_dev->get_num_devices_func ()) + gomp_fatal ("device %u does not exist", ord); + + if (!thr) + thr = goacc_new_thread (); + + acc_dev = thr->dev = &base_dev[ord]; + + assert (acc_dev->target_id == ord); + + thr->saved_bound_dev = NULL; + thr->mapped_data = NULL; + + if (!acc_dev->openacc.target_data) + acc_dev->openacc.target_data = acc_dev->openacc.open_device_func (ord); + + thr->target_tls + = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data); + + acc_dev->openacc.async_set_async_func (acc_async_sync); + + struct gomp_memory_mapping *mem_map = &acc_dev->mem_map; + gomp_mutex_lock (&mem_map->lock); + if (!mem_map->is_initialized) + gomp_init_tables (acc_dev, mem_map); + gomp_mutex_unlock (&mem_map->lock); +} + +/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of + init/shutdown is per-process or per-thread. We choose per-process. */ + +void +acc_init (acc_device_t d) +{ + if (!base_dev) + gomp_init_targets_once (); + + gomp_mutex_lock (&acc_device_lock); + + base_dev = acc_init_1 (d); + + lazy_open (-1); + + gomp_mutex_unlock (&acc_device_lock); +} + +ialias (acc_init) + +static void +acc_shutdown_1 (acc_device_t d) +{ + struct goacc_thread *walk; + + /* We don't check whether d matches the actual device found, because + OpenACC 2.0 (3.2.12) says the parameters to the init and this + call must match (for the shutdown call anyway, it's silent on + others). */ + + if (!base_dev) + gomp_fatal ("no device initialized"); + if (d != init_key) + gomp_fatal ("device %u(%u) is initialized", + (unsigned) init_key, (unsigned) base_dev->type); + + gomp_mutex_lock (&goacc_thread_lock); + + /* Free target-specific TLS data and close all devices. */ + for (walk = goacc_threads; walk != NULL; walk = walk->next) + { + if (walk->target_tls) + base_dev->openacc.destroy_thread_data_func (walk->target_tls); + + walk->target_tls = NULL; + + /* This would mean the user is shutting down OpenACC in the middle of an + "acc data" pragma. Likely not intentional. */ + if (walk->mapped_data) + gomp_fatal ("shutdown in 'acc data' region"); + + if (walk->dev) + { + void *target_data = walk->dev->openacc.target_data; + if (walk->dev->openacc.close_device_func (target_data) < 0) + gomp_fatal ("failed to close device"); + + walk->dev->openacc.target_data = target_data = NULL; + + struct gomp_memory_mapping *mem_map = &walk->dev->mem_map; + gomp_mutex_lock (&mem_map->lock); + gomp_free_memmap (mem_map); + gomp_mutex_unlock (&mem_map->lock); + + walk->dev = NULL; + } + } + + gomp_mutex_unlock (&goacc_thread_lock); + + gomp_fini_device (base_dev); + + base_dev = NULL; +} + +void +acc_shutdown (acc_device_t d) +{ + gomp_mutex_lock (&acc_device_lock); + + acc_shutdown_1 (d); + + gomp_mutex_unlock (&acc_device_lock); +} + +ialias (acc_shutdown) + +/* This function is called after plugins have been initialized. It deals with + the "base" device, and is used to prepare the runtime for dealing with a + number of such devices (as implemented by some particular plugin). If the + argument device type D matches a previous call to the function, return the + current base device, else shut the old device down and re-initialize with + the new device type. */ + +static struct gomp_device_descr * +lazy_init (acc_device_t d) +{ + if (base_dev) + { + /* Re-initializing the same device, do nothing. */ + if (d == init_key) + return base_dev; + + acc_shutdown_1 (init_key); + } + + assert (!base_dev); + + return acc_init_1 (d); +} + +/* Ensure that plugins are loaded, initialize and open the (default-numbered) + device. */ + +static void +lazy_init_and_open (acc_device_t d) +{ + if (!base_dev) + gomp_init_targets_once (); + + gomp_mutex_lock (&acc_device_lock); + + base_dev = lazy_init (d); + + lazy_open (-1); + + gomp_mutex_unlock (&acc_device_lock); +} + +int +acc_get_num_devices (acc_device_t d) +{ + int n = 0; + const struct gomp_device_descr *acc_dev; + + if (d == acc_device_none) + return 0; + + if (!base_dev) + gomp_init_targets_once (); + + acc_dev = resolve_device (d); + if (!acc_dev) + return 0; + + n = acc_dev->get_num_devices_func (); + if (n < 0) + n = 0; + + return n; +} + +ialias (acc_get_num_devices) + +void +acc_set_device_type (acc_device_t d) +{ + lazy_init_and_open (d); +} + +ialias (acc_set_device_type) + +acc_device_t +acc_get_device_type (void) +{ + acc_device_t res = acc_device_none; + const struct gomp_device_descr *dev; + + if (base_dev) + res = acc_device_type (base_dev->type); + else + { + gomp_init_targets_once (); + + dev = resolve_device (acc_device_default); + res = acc_device_type (dev->type); + } + + assert (res != acc_device_default + && res != acc_device_not_host); + + return res; +} + +ialias (acc_get_device_type) + +int +acc_get_device_num (acc_device_t d) +{ + const struct gomp_device_descr *dev; + int num; + + if (d >= _ACC_device_hwm) + gomp_fatal ("device %u out of range", (unsigned)d); + + if (!base_dev) + gomp_init_targets_once (); + + dev = resolve_device (d); + if (!dev) + gomp_fatal ("no devices of type %u", d); + + /* We might not have called lazy_open for this host thread yet, in which case + the get_device_num_func hook will return -1. */ + num = dev->openacc.get_device_num_func (); + if (num < 0) + num = goacc_device_num; + + return num; +} + +ialias (acc_get_device_num) + +void +acc_set_device_num (int n, acc_device_t d) +{ + const struct gomp_device_descr *dev; + int num_devices; + + if (!base_dev) + gomp_init_targets_once (); + + if ((int) d == 0) + { + int i; + + /* A device setting of zero sets all device types on the system to use + the Nth instance of that device type. Only attempt it for initialized + devices though. */ + for (i = acc_device_not_host + 1; i < _ACC_device_hwm; i++) + { + dev = resolve_device (d); + if (dev && dev->is_initialized) + dev->openacc.set_device_num_func (n); + } + + /* ...and for future calls to acc_init/acc_set_device_type, etc. */ + goacc_device_num = n; + } + else + { + struct goacc_thread *thr = goacc_thread (); + + gomp_mutex_lock (&acc_device_lock); + + base_dev = lazy_init (d); + + num_devices = base_dev->get_num_devices_func (); + + if (n >= num_devices) + gomp_fatal ("device %u out of range", n); + + /* If we're changing the device number, de-associate this thread with + the device (but don't close the device, since it may be in use by + other threads). */ + if (thr && thr->dev && n != thr->dev->target_id) + thr->dev = NULL; + + lazy_open (n); + + gomp_mutex_unlock (&acc_device_lock); + } +} + +ialias (acc_set_device_num) + +int +acc_on_device (acc_device_t dev) +{ + struct goacc_thread *thr = goacc_thread (); + + if (thr && thr->dev + && acc_device_type (thr->dev->type) == acc_device_host_nonshm) + return dev == acc_device_host_nonshm || dev == acc_device_not_host; + + /* Just rely on the compiler builtin. */ + return __builtin_acc_on_device (dev); +} + +ialias (acc_on_device) + +attribute_hidden void +goacc_runtime_initialize (void) +{ + gomp_mutex_init (&acc_device_lock); + +#if !(defined HAVE_TLS || defined USE_EMUTLS) + pthread_key_create (&goacc_tls_key, NULL); +#endif + + pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); + + base_dev = NULL; + + goacc_threads = NULL; + gomp_mutex_init (&goacc_thread_lock); +} + +/* Compiler helper functions */ + +attribute_hidden void +goacc_save_and_set_bind (acc_device_t d) +{ + struct goacc_thread *thr = goacc_thread (); + + assert (!thr->saved_bound_dev); + + thr->saved_bound_dev = thr->dev; + thr->dev = dispatchers[d]; +} + +attribute_hidden void +goacc_restore_bind (void) +{ + struct goacc_thread *thr = goacc_thread (); + + thr->dev = thr->saved_bound_dev; + thr->saved_bound_dev = NULL; +} + +/* This is called from any OpenACC support function that may need to implicitly + initialize the libgomp runtime. On exit all such initialization will have + been done, and both the global ACC_dev and the per-host-thread ACC_memmap + pointers will be valid. */ + +attribute_hidden void +goacc_lazy_initialize (void) +{ + struct goacc_thread *thr = goacc_thread (); + + if (thr && thr->dev) + return; + + if (!base_dev) + lazy_init_and_open (acc_device_default); + else + { + gomp_mutex_lock (&acc_device_lock); + lazy_open (-1); + gomp_mutex_unlock (&acc_device_lock); + } +} diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h new file mode 100644 index 00000000000..85619c8d10a --- /dev/null +++ b/libgomp/oacc-int.h @@ -0,0 +1,105 @@ +/* OpenACC Runtime - internal declarations + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file contains data types and function declarations that are not + part of the official OpenACC user interface. There are declarations + in here that are part of the GNU OpenACC ABI, in that the compiler is + required to know about them and use them. + + The convention is that the all caps prefix "GOACC" is used group items + that are part of the external ABI, and the lower case prefix "goacc" + is used group items that are completely private to the library. */ + +#ifndef OACC_INT_H +#define OACC_INT_H 1 + +#include "openacc.h" +#include "config.h" +#include +#include +#include + +#ifdef HAVE_ATTRIBUTE_VISIBILITY +# pragma GCC visibility push(hidden) +#endif + +static inline enum acc_device_t +acc_device_type (enum offload_target_type type) +{ + return (enum acc_device_t) type; +} + +struct goacc_thread +{ + /* The device for the current thread. */ + struct gomp_device_descr *dev; + + struct gomp_device_descr *saved_bound_dev; + + /* This is a linked list of data mapped by the "acc data" pragma, following + strictly push/pop semantics according to lexical scope. */ + struct target_mem_desc *mapped_data; + + /* These structures form a list: this is the next thread in that list. */ + struct goacc_thread *next; + + /* Target-specific data (used by plugin). */ + void *target_tls; +}; + +#if defined HAVE_TLS || defined USE_EMUTLS +extern __thread struct goacc_thread *goacc_tls_data; +static inline struct goacc_thread * +goacc_thread (void) +{ + return goacc_tls_data; +} +#else +extern pthread_key_t goacc_tls_key; +static inline struct goacc_thread * +goacc_thread (void) +{ + return pthread_getspecific (goacc_tls_key); +} +#endif + +void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW; + +/* Current dispatcher. */ +extern struct gomp_device_descr *base_dev; + +void goacc_runtime_initialize (void); +void goacc_save_and_set_bind (acc_device_t); +void goacc_restore_bind (void); +void goacc_lazy_initialize (void); + +#ifdef HAVE_ATTRIBUTE_VISIBILITY +# pragma GCC visibility pop +#endif + +#endif diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c new file mode 100644 index 00000000000..0096d514297 --- /dev/null +++ b/libgomp/oacc-mem.c @@ -0,0 +1,585 @@ +/* OpenACC Runtime initialization routines + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "openacc.h" +#include "config.h" +#include "libgomp.h" +#include "gomp-constants.h" +#include "oacc-int.h" +#include "splay-tree.h" +#include +#include + +/* Return block containing [H->S), or NULL if not contained. */ + +static splay_tree_key +lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s) +{ + struct splay_tree_key_s node; + splay_tree_key key; + + node.host_start = (uintptr_t) h; + node.host_end = (uintptr_t) h + s; + + gomp_mutex_lock (&mem_map->lock); + + key = splay_tree_lookup (&mem_map->splay_tree, &node); + + gomp_mutex_unlock (&mem_map->lock); + + return key; +} + +/* Return block containing [D->S), or NULL if not contained. + The list isn't ordered by device address, so we have to iterate + over the whole array. This is not expected to be a common + operation. */ + +static splay_tree_key +lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) +{ + int i; + struct target_mem_desc *t; + struct gomp_memory_mapping *mem_map; + + if (!tgt) + return NULL; + + mem_map = tgt->mem_map; + + gomp_mutex_lock (&mem_map->lock); + + for (t = tgt; t != NULL; t = t->prev) + { + if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) + break; + } + + gomp_mutex_unlock (&mem_map->lock); + + if (!t) + return NULL; + + for (i = 0; i < t->list_count; i++) + { + void * offset; + + splay_tree_key k = &t->array[i].key; + offset = d - t->tgt_start + k->tgt_offset; + + if (k->host_start + offset <= (void *) k->host_end) + return k; + } + + return NULL; +} + +/* OpenACC is silent on how memory exhaustion is indicated. We return + NULL. */ + +void * +acc_malloc (size_t s) +{ + if (!s) + return NULL; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + + return base_dev->alloc_func (thr->dev->target_id, s); +} + +/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event + the device address is mapped. We choose to check if it mapped, + and if it is, to unmap it. */ +void +acc_free (void *d) +{ + splay_tree_key k; + struct goacc_thread *thr = goacc_thread (); + + if (!d) + return; + + /* We don't have to call lazy open here, as the ptr value must have + been returned by acc_malloc. It's not permitted to pass NULL in + (unless you got that null from acc_malloc). */ + if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1))) + { + void *offset; + + offset = d - k->tgt->tgt_start + k->tgt_offset; + + acc_unmap_data ((void *)(k->host_start + offset)); + } + + base_dev->free_func (thr->dev->target_id, d); +} + +void +acc_memcpy_to_device (void *d, void *h, size_t s) +{ + /* No need to call lazy open here, as the device pointer must have + been obtained from a routine that did that. */ + struct goacc_thread *thr = goacc_thread (); + + base_dev->host2dev_func (thr->dev->target_id, d, h, s); +} + +void +acc_memcpy_from_device (void *h, void *d, size_t s) +{ + /* No need to call lazy open here, as the device pointer must have + been obtained from a routine that did that. */ + struct goacc_thread *thr = goacc_thread (); + + base_dev->dev2host_func (thr->dev->target_id, h, d, s); +} + +/* Return the device pointer that corresponds to host data H. Or NULL + if no mapping. */ + +void * +acc_deviceptr (void *h) +{ + splay_tree_key n; + void *d; + void *offset; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + + n = lookup_host (&thr->dev->mem_map, h, 1); + + if (!n) + return NULL; + + offset = h - n->host_start; + + d = n->tgt->tgt_start + n->tgt_offset + offset; + + return d; +} + +/* Return the host pointer that corresponds to device data D. Or NULL + if no mapping. */ + +void * +acc_hostptr (void *d) +{ + splay_tree_key n; + void *h; + void *offset; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + + n = lookup_dev (thr->dev->openacc.data_environ, d, 1); + + if (!n) + return NULL; + + offset = d - n->tgt->tgt_start + n->tgt_offset; + + h = n->host_start + offset; + + return h; +} + +/* Return 1 if host data [H,+S] is present on the device. */ + +int +acc_is_present (void *h, size_t s) +{ + splay_tree_key n; + + if (!s || !h) + return 0; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + n = lookup_host (&acc_dev->mem_map, h, s); + + if (n && ((uintptr_t)h < n->host_start + || (uintptr_t)h + s > n->host_end + || s > n->host_end - n->host_start)) + n = NULL; + + return n != NULL; +} + +/* Create a mapping for host [H,+S] -> device [D,+S] */ + +void +acc_map_data (void *h, void *d, size_t s) +{ + struct target_mem_desc *tgt; + size_t mapnum = 1; + void *hostaddrs = h; + void *devaddrs = d; + size_t sizes = s; + unsigned short kinds = GOMP_MAP_ALLOC; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + if (d != h) + gomp_fatal ("cannot map data on shared-memory system"); + + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); + } + else + { + struct goacc_thread *thr = goacc_thread (); + + if (!d || !h || !s) + gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", + (void *)h, (int)s, (void *)d, (int)s); + + if (lookup_host (&acc_dev->mem_map, h, s)) + gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, + (int)s); + + if (lookup_dev (thr->dev->openacc.data_environ, d, s)) + gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, + (int)s); + + tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, + &kinds, true, false); + } + + tgt->prev = acc_dev->openacc.data_environ; + acc_dev->openacc.data_environ = tgt; +} + +void +acc_unmap_data (void *h) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + /* No need to call lazy open, as the address must have been mapped. */ + + size_t host_size; + splay_tree_key n = lookup_host (&acc_dev->mem_map, h, 1); + struct target_mem_desc *t; + + if (!n) + gomp_fatal ("%p is not a mapped block", (void *)h); + + host_size = n->host_end - n->host_start; + + if (n->host_start != (uintptr_t) h) + gomp_fatal ("[%p,%d] surrounds1 %p", + (void *) n->host_start, (int) host_size, (void *) h); + + t = n->tgt; + + if (t->refcount == 2) + { + struct target_mem_desc *tp; + + /* This is the last reference, so pull the descriptor off the + chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from + freeing the device memory. */ + t->tgt_end = 0; + t->to_free = 0; + + gomp_mutex_lock (&acc_dev->mem_map.lock); + + for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; + tp = t, t = t->prev) + if (n->tgt == t) + { + if (tp) + tp->prev = t->prev; + else + acc_dev->openacc.data_environ = t->prev; + + break; + } + + gomp_mutex_unlock (&acc_dev->mem_map.lock); + } + + gomp_unmap_vars (t, true); +} + +#define FLAG_PRESENT (1 << 0) +#define FLAG_CREATE (1 << 1) +#define FLAG_COPY (1 << 2) + +static void * +present_create_copy (unsigned f, void *h, size_t s) +{ + void *d; + splay_tree_key n; + + if (!h || !s) + gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + n = lookup_host (&acc_dev->mem_map, h, s); + if (n) + { + /* Present. */ + d = (void *) (n->tgt->tgt_start + n->tgt_offset); + + if (!(f & FLAG_PRESENT)) + gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", + (void *)h, (int)s, (void *)d, (int)s); + if ((h + s) > (void *)n->host_end) + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); + } + else if (!(f & FLAG_CREATE)) + { + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); + } + else + { + struct target_mem_desc *tgt; + size_t mapnum = 1; + unsigned short kinds; + void *hostaddrs = h; + + if (f & FLAG_COPY) + kinds = GOMP_MAP_TO; + else + kinds = GOMP_MAP_ALLOC; + + tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, + false); + + gomp_mutex_lock (&acc_dev->mem_map.lock); + + d = tgt->to_free; + tgt->prev = acc_dev->openacc.data_environ; + acc_dev->openacc.data_environ = tgt; + + gomp_mutex_unlock (&acc_dev->mem_map.lock); + } + + return d; +} + +void * +acc_create (void *h, size_t s) +{ + return present_create_copy (FLAG_CREATE, h, s); +} + +void * +acc_copyin (void *h, size_t s) +{ + return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); +} + +void * +acc_present_or_create (void *h, size_t s) +{ + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); +} + +void * +acc_present_or_copyin (void *h, size_t s) +{ + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); +} + +#define FLAG_COPYOUT (1 << 0) + +static void +delete_copyout (unsigned f, void *h, size_t s) +{ + size_t host_size; + splay_tree_key n; + void *d; + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + n = lookup_host (&acc_dev->mem_map, h, s); + + /* No need to call lazy open, as the data must already have been + mapped. */ + + if (!n) + gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); + + d = (void *) (n->tgt->tgt_start + n->tgt_offset); + + host_size = n->host_end - n->host_start; + + if (n->host_start != (uintptr_t) h || host_size != s) + gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", + (void *) n->host_start, (int) host_size, (void *) h, (int) s); + + if (f & FLAG_COPYOUT) + acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + + acc_unmap_data (h); + + acc_dev->free_func (acc_dev->target_id, d); +} + +void +acc_delete (void *h , size_t s) +{ + delete_copyout (0, h, s); +} + +void acc_copyout (void *h, size_t s) +{ + delete_copyout (FLAG_COPYOUT, h, s); +} + +static void +update_dev_host (int is_dev, void *h, size_t s) +{ + splay_tree_key n; + void *d; + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + n = lookup_host (&acc_dev->mem_map, h, s); + + /* No need to call lazy open, as the data must already have been + mapped. */ + + if (!n) + gomp_fatal ("[%p,%d] is not mapped", h, (int)s); + + d = (void *) (n->tgt->tgt_start + n->tgt_offset); + + if (is_dev) + acc_dev->host2dev_func (acc_dev->target_id, d, h, s); + else + acc_dev->dev2host_func (acc_dev->target_id, h, d, s); +} + +void +acc_update_device (void *h, size_t s) +{ + update_dev_host (1, h, s); +} + +void +acc_update_self (void *h, size_t s) +{ + update_dev_host (0, h, s); +} + +void +gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, + void *kinds) +{ + struct target_mem_desc *tgt; + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, + NULL, sizes, kinds, true, false); + gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + tgt->prev = acc_dev->openacc.data_environ; + acc_dev->openacc.data_environ = tgt; +} + +void +gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + splay_tree_key n; + struct target_mem_desc *t; + int minrefs = (mapnum == 1) ? 2 : 3; + + n = lookup_host (&acc_dev->mem_map, h, 1); + + if (!n) + gomp_fatal ("%p is not a mapped block", (void *)h); + + gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); + + t = n->tgt; + + struct target_mem_desc *tp; + + gomp_mutex_lock (&acc_dev->mem_map.lock); + + if (t->refcount == minrefs) + { + /* This is the last reference, so pull the descriptor off the + chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from + freeing the device memory. */ + t->tgt_end = 0; + t->to_free = 0; + + for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; + tp = t, t = t->prev) + { + if (n->tgt == t) + { + if (tp) + tp->prev = t->prev; + else + acc_dev->openacc.data_environ = t->prev; + break; + } + } + } + + if (force_copyfrom) + t->list[0]->copy_from = 1; + + gomp_mutex_unlock (&acc_dev->mem_map.lock); + + /* If running synchronously, unmap immediately. */ + if (async < acc_async_noval) + gomp_unmap_vars (t, true); + else + { + gomp_copy_from_async (t); + acc_dev->openacc.register_async_cleanup_func (t); + } + + gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); +} diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c new file mode 100644 index 00000000000..6d5386b1f94 --- /dev/null +++ b/libgomp/oacc-parallel.c @@ -0,0 +1,490 @@ +/* Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file handles OpenACC constructs. */ + +#include "openacc.h" +#include "libgomp.h" +#include "libgomp_g.h" +#include "gomp-constants.h" +#include "oacc-int.h" +#include +#include +#include +#include + +static int +find_pset (int pos, size_t mapnum, unsigned short *kinds) +{ + if (pos + 1 >= mapnum) + return 0; + + unsigned char kind = kinds[pos+1] & 0xff; + + return kind == GOMP_MAP_TO_PSET; +} + + +/* Ensure that the target device for DEVICE_TYPE is initialised (and that + plugins have been loaded if appropriate). The ACC_dev variable for the + current thread will be set appropriately for the given device type on + return. */ + +attribute_hidden void +select_acc_device (int device_type) +{ + goacc_lazy_initialize (); + + if (device_type == GOMP_DEVICE_HOST_FALLBACK) + return; + + if (device_type == acc_device_none) + device_type = acc_device_host; + + if (device_type >= 0) + { + /* NOTE: this will go badly if the surrounding data environment is set up + to use a different device type. We'll just have to trust that users + know what they're doing... */ + acc_set_device_type (device_type); + } +} + +static void goacc_wait (int async, int num_waits, va_list ap); + +void +GOACC_parallel (int device, void (*fn) (void *), const void *offload_table, + size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned short *kinds, + int num_gangs, int num_workers, int vector_length, + int async, int num_waits, ...) +{ + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + va_list ap; + struct goacc_thread *thr; + struct gomp_device_descr *acc_dev; + struct target_mem_desc *tgt; + void **devaddrs; + unsigned int i; + struct splay_tree_key_s k; + splay_tree_key tgt_fn_key; + void (*tgt_fn); + + if (num_gangs != 1) + gomp_fatal ("num_gangs (%d) different from one is not yet supported", + num_gangs); + if (num_workers != 1) + gomp_fatal ("num_workers (%d) different from one is not yet supported", + num_workers); + + gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n", + __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async); + + select_acc_device (device); + + thr = goacc_thread (); + acc_dev = thr->dev; + + /* Host fallback if "if" clause is false or if the current device is set to + the host. */ + if (host_fallback) + { + goacc_save_and_set_bind (acc_device_host); + fn (hostaddrs); + goacc_restore_bind (); + return; + } + else if (acc_device_type (acc_dev->type) == acc_device_host) + { + fn (hostaddrs); + return; + } + + va_start (ap, num_waits); + + if (num_waits > 0) + goacc_wait (async, num_waits, ap); + + va_end (ap); + + acc_dev->openacc.async_set_async_func (async); + + if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) + { + k.host_start = (uintptr_t) fn; + k.host_end = k.host_start + 1; + gomp_mutex_lock (&acc_dev->mem_map.lock); + tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k); + gomp_mutex_unlock (&acc_dev->mem_map.lock); + + if (tgt_fn_key == NULL) + gomp_fatal ("target function wasn't mapped"); + + tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start; + } + else + tgt_fn = (void (*)) fn; + + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, + false); + + devaddrs = alloca (sizeof (void *) * mapnum); + for (i = 0; i < mapnum; i++) + devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start + + tgt->list[i]->tgt_offset); + + acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds, + num_gangs, num_workers, vector_length, async, + tgt); + + /* If running synchronously, unmap immediately. */ + if (async < acc_async_noval) + gomp_unmap_vars (tgt, true); + else + { + gomp_copy_from_async (tgt); + acc_dev->openacc.register_async_cleanup_func (tgt); + } + + acc_dev->openacc.async_set_async_func (acc_async_sync); +} + +void +GOACC_data_start (int device, const void *offload_table, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + struct target_mem_desc *tgt; + + gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n", + __FUNCTION__, mapnum, hostaddrs, sizes, kinds); + + select_acc_device (device); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + /* Host fallback or 'do nothing'. */ + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || host_fallback) + { + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; + + return; + } + + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, + false); + gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; +} + +void +GOACC_data_end (void) +{ + struct goacc_thread *thr = goacc_thread (); + struct target_mem_desc *tgt = thr->mapped_data; + + gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); + thr->mapped_data = tgt->prev; + gomp_unmap_vars (tgt, true); + gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); +} + +void +GOACC_enter_exit_data (int device, const void *offload_table, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + int async, int num_waits, ...) +{ + struct goacc_thread *thr; + struct gomp_device_descr *acc_dev; + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + bool data_enter = false; + size_t i; + + select_acc_device (device); + + thr = goacc_thread (); + acc_dev = thr->dev; + + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || host_fallback) + return; + + if (num_waits > 0) + { + va_list ap; + + va_start (ap, num_waits); + + goacc_wait (async, num_waits, ap); + + va_end (ap); + } + + acc_dev->openacc.async_set_async_func (async); + + /* Determine if this is an "acc enter data". */ + for (i = 0; i < mapnum; ++i) + { + unsigned char kind = kinds[i] & 0xff; + + if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) + continue; + + if (kind == GOMP_MAP_FORCE_ALLOC + || kind == GOMP_MAP_FORCE_PRESENT + || kind == GOMP_MAP_FORCE_TO) + { + data_enter = true; + break; + } + + if (kind == GOMP_MAP_FORCE_DEALLOC + || kind == GOMP_MAP_FORCE_FROM) + break; + + gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", + kind); + } + + if (data_enter) + { + for (i = 0; i < mapnum; i++) + { + unsigned char kind = kinds[i] & 0xff; + + /* Scan for PSETs. */ + int psets = find_pset (i, mapnum, kinds); + + if (!psets) + { + switch (kind) + { + case GOMP_MAP_POINTER: + gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i], + &kinds[i]); + break; + case GOMP_MAP_FORCE_ALLOC: + acc_create (hostaddrs[i], sizes[i]); + break; + case GOMP_MAP_FORCE_PRESENT: + acc_present_or_copyin (hostaddrs[i], sizes[i]); + break; + case GOMP_MAP_FORCE_TO: + acc_present_or_copyin (hostaddrs[i], sizes[i]); + break; + default: + gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", + kind); + break; + } + } + else + { + gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]); + /* Increment 'i' by two because OpenACC requires fortran + arrays to be contiguous, so each PSET is associated with + one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and + one MAP_POINTER. */ + i += 2; + } + } + } + else + for (i = 0; i < mapnum; ++i) + { + unsigned char kind = kinds[i] & 0xff; + + int psets = find_pset (i, mapnum, kinds); + + if (!psets) + { + switch (kind) + { + case GOMP_MAP_POINTER: + gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) + == GOMP_MAP_FORCE_FROM, + async, 1); + break; + case GOMP_MAP_FORCE_DEALLOC: + acc_delete (hostaddrs[i], sizes[i]); + break; + case GOMP_MAP_FORCE_FROM: + acc_copyout (hostaddrs[i], sizes[i]); + break; + default: + gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", + kind); + break; + } + } + else + { + gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) + == GOMP_MAP_FORCE_FROM, async, 3); + /* See the above comment. */ + i += 2; + } + } + + acc_dev->openacc.async_set_async_func (acc_async_sync); +} + +static void +goacc_wait (int async, int num_waits, va_list ap) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + int i; + + assert (num_waits >= 0); + + if (async == acc_async_sync && num_waits == 0) + { + acc_wait_all (); + return; + } + + if (async == acc_async_sync && num_waits) + { + for (i = 0; i < num_waits; i++) + { + int qid = va_arg (ap, int); + + if (acc_async_test (qid)) + continue; + + acc_wait (qid); + } + return; + } + + if (async == acc_async_noval && num_waits == 0) + { + acc_dev->openacc.async_wait_all_async_func (acc_async_noval); + return; + } + + for (i = 0; i < num_waits; i++) + { + int qid = va_arg (ap, int); + + if (acc_async_test (qid)) + continue; + + /* If we're waiting on the same asynchronous queue as we're launching on, + the queue itself will order work as required, so there's no need to + wait explicitly. */ + if (qid != async) + acc_dev->openacc.async_wait_async_func (qid, async); + } +} + +void +GOACC_update (int device, const void *offload_table, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds, + int async, int num_waits, ...) +{ + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + size_t i; + + select_acc_device (device); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || host_fallback) + return; + + if (num_waits > 0) + { + va_list ap; + + va_start (ap, num_waits); + + goacc_wait (async, num_waits, ap); + + va_end (ap); + } + + acc_dev->openacc.async_set_async_func (async); + + for (i = 0; i < mapnum; ++i) + { + unsigned char kind = kinds[i] & 0xff; + + switch (kind) + { + case GOMP_MAP_POINTER: + case GOMP_MAP_TO_PSET: + break; + + case GOMP_MAP_FORCE_TO: + acc_update_device (hostaddrs[i], sizes[i]); + break; + + case GOMP_MAP_FORCE_FROM: + acc_update_self (hostaddrs[i], sizes[i]); + break; + + default: + gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); + break; + } + } + + acc_dev->openacc.async_set_async_func (acc_async_sync); +} + +void +GOACC_wait (int async, int num_waits, ...) +{ + va_list ap; + + va_start (ap, num_waits); + + goacc_wait (async, num_waits, ap); + + va_end (ap); +} + +int +GOACC_get_num_threads (void) +{ + return 1; +} + +int +GOACC_get_thread_num (void) +{ + return 0; +} diff --git a/libgomp/oacc-plugin.c b/libgomp/oacc-plugin.c new file mode 100644 index 00000000000..44cd6d68e33 --- /dev/null +++ b/libgomp/oacc-plugin.c @@ -0,0 +1,48 @@ +/* Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Initialize and register OpenACC dispatch table from libgomp plugin. */ + +#include "libgomp.h" +#include "oacc-plugin.h" +#include "oacc-int.h" + +void +GOMP_PLUGIN_async_unmap_vars (void *ptr) +{ + struct target_mem_desc *tgt = ptr; + + gomp_unmap_vars (tgt, false); +} + +/* Return the target-specific part of the TLS data for the current thread. */ + +void * +GOMP_PLUGIN_acc_thread (void) +{ + struct goacc_thread *thr = goacc_thread (); + return thr ? thr->target_tls : NULL; +} diff --git a/libgomp/libgomp_target.h b/libgomp/oacc-plugin.h similarity index 71% rename from libgomp/libgomp_target.h rename to libgomp/oacc-plugin.h index 2e18a64b164..c60eb9c5ddd 100644 --- a/libgomp/libgomp_target.h +++ b/libgomp/oacc-plugin.h @@ -1,5 +1,7 @@ /* Copyright (C) 2014-2015 Free Software Foundation, Inc. + Contributed by Mentor Embedded. + This file is part of the GNU Offloading and Multi Processing Library (libgomp). @@ -22,24 +24,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ -#ifndef LIBGOMP_TARGET_H -#define LIBGOMP_TARGET_H 1 +#ifndef OACC_PLUGIN_H +#define OACC_PLUGIN_H 1 -/* Type of offload target device. */ -enum offload_target_type -{ - OFFLOAD_TARGET_TYPE_HOST, - OFFLOAD_TARGET_TYPE_INTEL_MIC -}; +extern void GOMP_PLUGIN_async_unmap_vars (void *); +extern void *GOMP_PLUGIN_acc_thread (void); -/* Auxiliary struct, used for transferring a host-target address range mapping - from plugin to libgomp. */ -struct mapping_table -{ - uintptr_t host_start; - uintptr_t host_end; - uintptr_t tgt_start; - uintptr_t tgt_end; -}; - -#endif /* LIBGOMP_TARGET_H */ +#endif diff --git a/libgomp/oacc-ptx.h b/libgomp/oacc-ptx.h new file mode 100644 index 00000000000..13ff86fb4af --- /dev/null +++ b/libgomp/oacc-ptx.h @@ -0,0 +1,202 @@ +/* Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#define ABORT_PTX \ + ".version 3.1\n" \ + ".target sm_30\n" \ + ".address_size 64\n" \ + ".visible .func abort;\n" \ + ".visible .func abort\n" \ + "{\n" \ + "trap;\n" \ + "ret;\n" \ + "}\n" \ + ".visible .func _gfortran_abort;\n" \ + ".visible .func _gfortran_abort\n" \ + "{\n" \ + "trap;\n" \ + "ret;\n" \ + "}\n" \ + +/* Generated with: + + $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline +*/ +#define ACC_ON_DEVICE_PTX \ + " .version 3.1\n" \ + " .target sm_30\n" \ + " .address_size 64\n" \ + ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \ + ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \ + "{\n" \ + " .reg.u32 %ar1;\n" \ + ".reg.u32 %retval;\n" \ + " .reg.u64 %hr10;\n" \ + " .reg.u32 %r24;\n" \ + " .reg.u32 %r25;\n" \ + " .reg.pred %r27;\n" \ + " .reg.u32 %r30;\n" \ + " ld.param.u32 %ar1, [%in_ar1];\n" \ + " mov.u32 %r24, %ar1;\n" \ + " setp.ne.u32 %r27,%r24,4;\n" \ + " set.u32.eq.u32 %r30,%r24,5;\n" \ + " neg.s32 %r25, %r30;\n" \ + " @%r27 bra $L3;\n" \ + " mov.u32 %r25, 1;\n" \ + "$L3:\n" \ + " mov.u32 %retval, %r25;\n" \ + " st.param.u32 [%out_retval], %retval;\n" \ + " ret;\n" \ + " }\n" \ + ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \ + ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \ + "{\n" \ + " .reg.u64 %ar1;\n" \ + ".reg.u32 %retval;\n" \ + " .reg.u64 %hr10;\n" \ + " .reg.u64 %r25;\n" \ + " .reg.u32 %r26;\n" \ + " .reg.u32 %r27;\n" \ + " ld.param.u64 %ar1, [%in_ar1];\n" \ + " mov.u64 %r25, %ar1;\n" \ + " ld.u32 %r26, [%r25];\n" \ + " {\n" \ + " .param.u32 %retval_in;\n" \ + " {\n" \ + " .param.u32 %out_arg0;\n" \ + " st.param.u32 [%out_arg0], %r26;\n" \ + " call (%retval_in), acc_on_device, (%out_arg0);\n" \ + " }\n" \ + " ld.param.u32 %r27, [%retval_in];\n" \ + "}\n" \ + " mov.u32 %retval, %r27;\n" \ + " st.param.u32 [%out_retval], %retval;\n" \ + " ret;\n" \ + " }" + + #define GOACC_INTERNAL_PTX \ + ".version 3.1\n" \ + ".target sm_30\n" \ + ".address_size 64\n" \ + ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \ + ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \ + ".extern .func abort;\n" \ + ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \ + "{\n" \ + ".reg .u32 %retval;\n" \ + ".reg .u64 %hr10;\n" \ + ".reg .u32 %r22;\n" \ + ".reg .u32 %r23;\n" \ + ".reg .u32 %r24;\n" \ + ".reg .u32 %r25;\n" \ + ".reg .u32 %r26;\n" \ + ".reg .u32 %r27;\n" \ + ".reg .u32 %r28;\n" \ + ".reg .u32 %r29;\n" \ + "mov.u32 %r26,0;\n" \ + "{\n" \ + ".param .u32 %retval_in;\n" \ + "{\n" \ + ".param .u32 %out_arg0;\n" \ + "st.param.u32 [%out_arg0],%r26;\n" \ + "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \ + "}\n" \ + "ld.param.u32 %r27,[%retval_in];\n" \ + "}\n" \ + "mov.u32 %r22,%r27;\n" \ + "mov.u32 %r28,0;\n" \ + "{\n" \ + ".param .u32 %retval_in;\n" \ + "{\n" \ + ".param .u32 %out_arg0;\n" \ + "st.param.u32 [%out_arg0],%r28;\n" \ + "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \ + "}\n" \ + "ld.param.u32 %r29,[%retval_in];\n" \ + "}\n" \ + "mov.u32 %r23,%r29;\n" \ + "mul.lo.u32 %r24,%r22,%r23;\n" \ + "mov.u32 %r25,%r24;\n" \ + "mov.u32 %retval,%r25;\n" \ + "st.param.u32 [%out_retval],%retval;\n" \ + "ret;\n" \ + "}\n" \ + ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \ + "{\n" \ + ".reg .u32 %retval;\n" \ + ".reg .u64 %hr10;\n" \ + ".reg .u32 %r22;\n" \ + ".reg .u32 %r23;\n" \ + ".reg .u32 %r24;\n" \ + ".reg .u32 %r25;\n" \ + ".reg .u32 %r26;\n" \ + ".reg .u32 %r27;\n" \ + ".reg .u32 %r28;\n" \ + ".reg .u32 %r29;\n" \ + ".reg .u32 %r30;\n" \ + ".reg .u32 %r31;\n" \ + ".reg .u32 %r32;\n" \ + ".reg .u32 %r33;\n" \ + "mov.u32 %r28,0;\n" \ + "{\n" \ + ".param .u32 %retval_in;\n" \ + "{\n" \ + ".param .u32 %out_arg0;\n" \ + "st.param.u32 [%out_arg0],%r28;\n" \ + "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \ + "}\n" \ + "ld.param.u32 %r29,[%retval_in];\n" \ + "}\n" \ + "mov.u32 %r22,%r29;\n" \ + "mov.u32 %r30,0;\n" \ + "{\n" \ + ".param .u32 %retval_in;\n" \ + "{\n" \ + ".param .u32 %out_arg0;\n" \ + "st.param.u32 [%out_arg0],%r30;\n" \ + "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \ + "}\n" \ + "ld.param.u32 %r31,[%retval_in];\n" \ + "}\n" \ + "mov.u32 %r23,%r31;\n" \ + "mul.lo.u32 %r24,%r22,%r23;\n" \ + "mov.u32 %r32,0;\n" \ + "{\n" \ + ".param .u32 %retval_in;\n" \ + "{\n" \ + ".param .u32 %out_arg0;\n" \ + "st.param.u32 [%out_arg0],%r32;\n" \ + "call (%retval_in),GOACC_tid,(%out_arg0);\n" \ + "}\n" \ + "ld.param.u32 %r33,[%retval_in];\n" \ + "}\n" \ + "mov.u32 %r25,%r33;\n" \ + "add.u32 %r26,%r24,%r25;\n" \ + "mov.u32 %r27,%r26;\n" \ + "mov.u32 %retval,%r27;\n" \ + "st.param.u32 [%out_retval],%retval;\n" \ + "ret;\n" \ + "}\n" diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 new file mode 100644 index 00000000000..04d80886a98 --- /dev/null +++ b/libgomp/openacc.f90 @@ -0,0 +1,956 @@ +! OpenACC Runtime Library Definitions. + +! Copyright (C) 2014-2015 Free Software Foundation, Inc. + +! Contributed by Tobias Burnus +! and Mentor Embedded. + +! This file is part of the GNU Offloading and Multi Processing Library +! (libgomp). + +! Libgomp is free software; you can redistribute it and/or modify it +! under the terms of the GNU General Public License as published by +! the Free Software Foundation; either version 3, or (at your option) +! any later version. + +! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +! FOR A PARTICULAR PURPOSE. See the GNU General Public License for +! more details. + +! Under Section 7 of GPL version 3, you are granted additional +! permissions described in the GCC Runtime Library Exception, version +! 3.1, as published by the Free Software Foundation. + +! You should have received a copy of the GNU General Public License and +! a copy of the GCC Runtime Library Exception along with this program; +! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +! . + +module openacc_kinds + use iso_fortran_env, only: int32 + implicit none + + private :: int32 + public :: acc_device_kind + + integer, parameter :: acc_device_kind = int32 + + public :: acc_device_none, acc_device_default, acc_device_host + public :: acc_device_not_host, acc_device_nvidia + + ! Keep in sync with include/gomp-constants.h. + integer (acc_device_kind), parameter :: acc_device_none = 0 + integer (acc_device_kind), parameter :: acc_device_default = 1 + integer (acc_device_kind), parameter :: acc_device_host = 2 + integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 + integer (acc_device_kind), parameter :: acc_device_not_host = 4 + integer (acc_device_kind), parameter :: acc_device_nvidia = 5 + + public :: acc_handle_kind + + integer, parameter :: acc_handle_kind = int32 + + public :: acc_async_noval, acc_async_sync + + ! Keep in sync with include/gomp-constants.h. + integer (acc_handle_kind), parameter :: acc_async_noval = -1 + integer (acc_handle_kind), parameter :: acc_async_sync = -2 + +end module + +module openacc_internal + use openacc_kinds + implicit none + + interface + function acc_get_num_devices_h (d) + import + integer acc_get_num_devices_h + integer (acc_device_kind) d + end function + + subroutine acc_set_device_type_h (d) + import + integer (acc_device_kind) d + end subroutine + + function acc_get_device_type_h () + import + integer (acc_device_kind) acc_get_device_type_h + end function + + subroutine acc_set_device_num_h (n, d) + import + integer n + integer (acc_device_kind) d + end subroutine + + function acc_get_device_num_h (d) + import + integer acc_get_device_num_h + integer (acc_device_kind) d + end function + + function acc_async_test_h (a) + logical acc_async_test_h + integer a + end function + + function acc_async_test_all_h () + logical acc_async_test_all_h + end function + + subroutine acc_wait_h (a) + integer a + end subroutine + + subroutine acc_wait_async_h (a1, a2) + integer a1, a2 + end subroutine + + subroutine acc_wait_all_h () + end subroutine + + subroutine acc_wait_all_async_h (a) + integer a + end subroutine + + subroutine acc_init_h (d) + import + integer (acc_device_kind) d + end subroutine + + subroutine acc_shutdown_h (d) + import + integer (acc_device_kind) d + end subroutine + + function acc_on_device_h (d) + import + integer (acc_device_kind) d + logical acc_on_device_h + end function + + subroutine acc_copyin_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyin_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyin_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + subroutine acc_present_or_copyin_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_present_or_copyin_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_present_or_copyin_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + subroutine acc_create_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_create_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_create_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + subroutine acc_present_or_create_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_present_or_create_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_present_or_create_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + subroutine acc_copyout_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyout_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyout_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + subroutine acc_delete_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_delete_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_delete_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + subroutine acc_update_device_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_update_device_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_update_device_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + subroutine acc_update_self_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_update_self_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_update_self_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + + function acc_is_present_32_h (a, len) + use iso_c_binding, only: c_int32_t + logical acc_is_present_32_h + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end function + + function acc_is_present_64_h (a, len) + use iso_c_binding, only: c_int64_t + logical acc_is_present_64_h + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end function + + function acc_is_present_array_h (a) + logical acc_is_present_array_h + type (*), dimension (..), contiguous :: a + end function + end interface + + interface + function acc_get_num_devices_l (d) & + bind (C, name = "acc_get_num_devices") + use iso_c_binding, only: c_int + integer (c_int) :: acc_get_num_devices_l + integer (c_int), value :: d + end function + + subroutine acc_set_device_type_l (d) & + bind (C, name = "acc_set_device_type") + use iso_c_binding, only: c_int + integer (c_int), value :: d + end subroutine + + function acc_get_device_type_l () & + bind (C, name = "acc_get_device_type") + use iso_c_binding, only: c_int + integer (c_int) :: acc_get_device_type_l + end function + + subroutine acc_set_device_num_l (n, d) & + bind (C, name = "acc_set_device_num") + use iso_c_binding, only: c_int + integer (c_int), value :: n, d + end subroutine + + function acc_get_device_num_l (d) & + bind (C, name = "acc_get_device_num") + use iso_c_binding, only: c_int + integer (c_int) :: acc_get_device_num_l + integer (c_int), value :: d + end function + + function acc_async_test_l (a) & + bind (C, name = "acc_async_test") + use iso_c_binding, only: c_int + integer (c_int) :: acc_async_test_l + integer (c_int), value :: a + end function + + function acc_async_test_all_l () & + bind (C, name = "acc_async_test_all") + use iso_c_binding, only: c_int + integer (c_int) :: acc_async_test_all_l + end function + + subroutine acc_wait_l (a) & + bind (C, name = "acc_wait") + use iso_c_binding, only: c_int + integer (c_int), value :: a + end subroutine + + subroutine acc_wait_async_l (a1, a2) & + bind (C, name = "acc_wait_async") + use iso_c_binding, only: c_int + integer (c_int), value :: a1, a2 + end subroutine + + subroutine acc_wait_all_l () & + bind (C, name = "acc_wait_all") + use iso_c_binding, only: c_int + end subroutine + + subroutine acc_wait_all_async_l (a) & + bind (C, name = "acc_wait_all_async") + use iso_c_binding, only: c_int + integer (c_int), value :: a + end subroutine + + subroutine acc_init_l (d) & + bind (C, name = "acc_init") + use iso_c_binding, only: c_int + integer (c_int), value :: d + end subroutine + + subroutine acc_shutdown_l (d) & + bind (C, name = "acc_shutdown") + use iso_c_binding, only: c_int + integer (c_int), value :: d + end subroutine + + function acc_on_device_l (d) & + bind (C, name = "acc_on_device") + use iso_c_binding, only: c_int + integer (c_int) :: acc_on_device_l + integer (c_int), value :: d + end function + + subroutine acc_copyin_l (a, len) & + bind (C, name = "acc_copyin") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + subroutine acc_present_or_copyin_l (a, len) & + bind (C, name = "acc_present_or_copyin") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + subroutine acc_create_l (a, len) & + bind (C, name = "acc_create") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + subroutine acc_present_or_create_l (a, len) & + bind (C, name = "acc_present_or_create") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + subroutine acc_copyout_l (a, len) & + bind (C, name = "acc_copyout") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + subroutine acc_delete_l (a, len) & + bind (C, name = "acc_delete") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + subroutine acc_update_device_l (a, len) & + bind (C, name = "acc_update_device") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + subroutine acc_update_self_l (a, len) & + bind (C, name = "acc_update_self") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + + function acc_is_present_l (a, len) & + bind (C, name = "acc_is_present") + use iso_c_binding, only: c_int32_t, c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + integer (c_int32_t) :: acc_is_present_l + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end function + end interface +end module + +module openacc + use openacc_kinds + use openacc_internal + implicit none + + public :: openacc_version + + public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type + public :: acc_set_device_num, acc_get_device_num, acc_async_test + public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all + public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device + public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create + public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete + public :: acc_update_device, acc_update_self, acc_is_present + + integer, parameter :: openacc_version = 201306 + + interface acc_get_num_devices + procedure :: acc_get_num_devices_h + end interface + + interface acc_set_device_type + procedure :: acc_set_device_type_h + end interface + + interface acc_get_device_type + procedure :: acc_get_device_type_h + end interface + + interface acc_set_device_num + procedure :: acc_set_device_num_h + end interface + + interface acc_get_device_num + procedure :: acc_get_device_num_h + end interface + + interface acc_async_test + procedure :: acc_async_test_h + end interface + + interface acc_async_test_all + procedure :: acc_async_test_all_h + end interface + + interface acc_wait + procedure :: acc_wait_h + end interface + + interface acc_wait_async + procedure :: acc_wait_async_h + end interface + + interface acc_wait_all + procedure :: acc_wait_all_h + end interface + + interface acc_wait_all_async + procedure :: acc_wait_all_async_h + end interface + + interface acc_init + procedure :: acc_init_h + end interface + + interface acc_shutdown + procedure :: acc_shutdown_h + end interface + + interface acc_on_device + procedure :: acc_on_device_h + end interface + + ! acc_malloc: Only available in C/C++ + ! acc_free: Only available in C/C++ + + ! As vendor extension, the following code supports both 32bit and 64bit + ! arguments for "size"; the OpenACC standard only permits default-kind + ! integers, which are of kind 4 (i.e. 32 bits). + ! Additionally, the two-argument version also takes arrays as argument. + ! and the one argument version also scalars. Note that the code assumes + ! that the arrays are contiguous. + + interface acc_copyin + procedure :: acc_copyin_32_h + procedure :: acc_copyin_64_h + procedure :: acc_copyin_array_h + end interface + + interface acc_present_or_copyin + procedure :: acc_present_or_copyin_32_h + procedure :: acc_present_or_copyin_64_h + procedure :: acc_present_or_copyin_array_h + end interface + + interface acc_pcopyin + procedure :: acc_present_or_copyin_32_h + procedure :: acc_present_or_copyin_64_h + procedure :: acc_present_or_copyin_array_h + end interface + + interface acc_create + procedure :: acc_create_32_h + procedure :: acc_create_64_h + procedure :: acc_create_array_h + end interface + + interface acc_present_or_create + procedure :: acc_present_or_create_32_h + procedure :: acc_present_or_create_64_h + procedure :: acc_present_or_create_array_h + end interface + + interface acc_pcreate + procedure :: acc_present_or_create_32_h + procedure :: acc_present_or_create_64_h + procedure :: acc_present_or_create_array_h + end interface + + interface acc_copyout + procedure :: acc_copyout_32_h + procedure :: acc_copyout_64_h + procedure :: acc_copyout_array_h + end interface + + interface acc_delete + procedure :: acc_delete_32_h + procedure :: acc_delete_64_h + procedure :: acc_delete_array_h + end interface + + interface acc_update_device + procedure :: acc_update_device_32_h + procedure :: acc_update_device_64_h + procedure :: acc_update_device_array_h + end interface + + interface acc_update_self + procedure :: acc_update_self_32_h + procedure :: acc_update_self_64_h + procedure :: acc_update_self_array_h + end interface + + ! acc_map_data: Only available in C/C++ + ! acc_unmap_data: Only available in C/C++ + ! acc_deviceptr: Only available in C/C++ + ! acc_hostptr: Only available in C/C++ + + interface acc_is_present + procedure :: acc_is_present_32_h + procedure :: acc_is_present_64_h + procedure :: acc_is_present_array_h + end interface + + ! acc_memcpy_to_device: Only available in C/C++ + ! acc_memcpy_from_device: Only available in C/C++ + +end module + +function acc_get_num_devices_h (d) + use openacc_internal, only: acc_get_num_devices_l + use openacc_kinds + integer acc_get_num_devices_h + integer (acc_device_kind) d + acc_get_num_devices_h = acc_get_num_devices_l (d) +end function + +subroutine acc_set_device_type_h (d) + use openacc_internal, only: acc_set_device_type_l + use openacc_kinds + integer (acc_device_kind) d + call acc_set_device_type_l (d) +end subroutine + +function acc_get_device_type_h () + use openacc_internal, only: acc_get_device_type_l + use openacc_kinds + integer (acc_device_kind) acc_get_device_type_h + acc_get_device_type_h = acc_get_device_type_l () +end function + +subroutine acc_set_device_num_h (n, d) + use openacc_internal, only: acc_set_device_num_l + use openacc_kinds + integer n + integer (acc_device_kind) d + call acc_set_device_num_l (n, d) +end subroutine + +function acc_get_device_num_h (d) + use openacc_internal, only: acc_get_device_num_l + use openacc_kinds + integer acc_get_device_num_h + integer (acc_device_kind) d + acc_get_device_num_h = acc_get_device_num_l (d) +end function + +function acc_async_test_h (a) + use openacc_internal, only: acc_async_test_l + logical acc_async_test_h + integer a + if (acc_async_test_l (a) .eq. 1) then + acc_async_test_h = .TRUE. + else + acc_async_test_h = .FALSE. + end if +end function + +function acc_async_test_all_h () + use openacc_internal, only: acc_async_test_all_l + logical acc_async_test_all_h + if (acc_async_test_all_l () .eq. 1) then + acc_async_test_all_h = .TRUE. + else + acc_async_test_all_h = .FALSE. + end if +end function + +subroutine acc_wait_h (a) + use openacc_internal, only: acc_wait_l + integer a + call acc_wait_l (a) +end subroutine + +subroutine acc_wait_async_h (a1, a2) + use openacc_internal, only: acc_wait_async_l + integer a1, a2 + call acc_wait_async_l (a1, a2) +end subroutine + +subroutine acc_wait_all_h () + use openacc_internal, only: acc_wait_all_l + call acc_wait_all_l () +end subroutine + +subroutine acc_wait_all_async_h (a) + use openacc_internal, only: acc_wait_all_async_l + integer a + call acc_wait_all_async_l (a) +end subroutine + +subroutine acc_init_h (d) + use openacc_internal, only: acc_init_l + use openacc_kinds + integer (acc_device_kind) d + call acc_init_l (d) +end subroutine + +subroutine acc_shutdown_h (d) + use openacc_internal, only: acc_shutdown_l + use openacc_kinds + integer (acc_device_kind) d + call acc_shutdown_l (d) +end subroutine + +function acc_on_device_h (d) + use openacc_internal, only: acc_on_device_l + use openacc_kinds + integer (acc_device_kind) d + logical acc_on_device_h + if (acc_on_device_l (d) .eq. 1) then + acc_on_device_h = .TRUE. + else + acc_on_device_h = .FALSE. + end if +end function + +subroutine acc_copyin_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_copyin_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_copyin_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyin_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_copyin_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_copyin_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyin_array_h (a) + use openacc_internal, only: acc_copyin_l + type (*), dimension (..), contiguous :: a + call acc_copyin_l (a, sizeof (a)) +end subroutine + +subroutine acc_present_or_copyin_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_present_or_copyin_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_present_or_copyin_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_present_or_copyin_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_present_or_copyin_array_h (a) + use openacc_internal, only: acc_present_or_copyin_l + type (*), dimension (..), contiguous :: a + call acc_present_or_copyin_l (a, sizeof (a)) +end subroutine + +subroutine acc_create_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_create_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_create_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_create_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_create_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_create_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_create_array_h (a) + use openacc_internal, only: acc_create_l + type (*), dimension (..), contiguous :: a + call acc_create_l (a, sizeof (a)) +end subroutine + +subroutine acc_present_or_create_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_present_or_create_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_present_or_create_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_present_or_create_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_present_or_create_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_present_or_create_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_present_or_create_array_h (a) + use openacc_internal, only: acc_present_or_create_l + type (*), dimension (..), contiguous :: a + call acc_present_or_create_l (a, sizeof (a)) +end subroutine + +subroutine acc_copyout_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_copyout_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_copyout_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyout_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_copyout_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_copyout_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyout_array_h (a) + use openacc_internal, only: acc_copyout_l + type (*), dimension (..), contiguous :: a + call acc_copyout_l (a, sizeof (a)) +end subroutine + +subroutine acc_delete_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_delete_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_delete_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_delete_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_delete_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_delete_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_delete_array_h (a) + use openacc_internal, only: acc_delete_l + type (*), dimension (..), contiguous :: a + call acc_delete_l (a, sizeof (a)) +end subroutine + +subroutine acc_update_device_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_update_device_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_update_device_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_update_device_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_update_device_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_update_device_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_update_device_array_h (a) + use openacc_internal, only: acc_update_device_l + type (*), dimension (..), contiguous :: a + call acc_update_device_l (a, sizeof (a)) +end subroutine + +subroutine acc_update_self_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_update_self_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_update_self_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_update_self_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_update_self_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_update_self_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_update_self_array_h (a) + use openacc_internal, only: acc_update_self_l + type (*), dimension (..), contiguous :: a + call acc_update_self_l (a, sizeof (a)) +end subroutine + +function acc_is_present_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_is_present_l + logical acc_is_present_32_h + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then + acc_is_present_32_h = .TRUE. + else + acc_is_present_32_h = .FALSE. + end if +end function + +function acc_is_present_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_is_present_l + logical acc_is_present_64_h + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then + acc_is_present_64_h = .TRUE. + else + acc_is_present_64_h = .FALSE. + end if +end function + +function acc_is_present_array_h (a) + use openacc_internal, only: acc_is_present_l + logical acc_is_present_array_h + type (*), dimension (..), contiguous :: a + acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1 +end function diff --git a/libgomp/openacc.h b/libgomp/openacc.h new file mode 100644 index 00000000000..334324114f1 --- /dev/null +++ b/libgomp/openacc.h @@ -0,0 +1,118 @@ +/* OpenACC Runtime Library User-facing Declarations + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _OPENACC_H +#define _OPENACC_H 1 + +/* The OpenACC standard is silent on whether or not including + might or must not include other header files. We chose to include + some. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if __cplusplus >= 201103 +# define __GOACC_NOTHROW noexcept () +#elif __cplusplus +# define __GOACC_NOTHROW throw () +#else /* Not C++ */ +# define __GOACC_NOTHROW __attribute__ ((__nothrow__)) +#endif + +/* Types */ +typedef enum acc_device_t + { + /* Keep in sync with include/gomp-constants.h. */ + acc_device_none = 0, + acc_device_default = 1, + acc_device_host = 2, + acc_device_host_nonshm = 3, + acc_device_not_host = 4, + acc_device_nvidia = 5, + _ACC_device_hwm + } acc_device_t; + +typedef enum acc_async_t + { + /* Keep in sync with include/gomp-constants.h. */ + acc_async_noval = -1, + acc_async_sync = -2 + } acc_async_t; + +int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW; +void acc_set_device_type (acc_device_t) __GOACC_NOTHROW; +acc_device_t acc_get_device_type (void) __GOACC_NOTHROW; +void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW; +int acc_get_device_num (acc_device_t) __GOACC_NOTHROW; +int acc_async_test (int) __GOACC_NOTHROW; +int acc_async_test_all (void) __GOACC_NOTHROW; +void acc_wait (int) __GOACC_NOTHROW; +void acc_wait_async (int, int) __GOACC_NOTHROW; +void acc_wait_all (void) __GOACC_NOTHROW; +void acc_wait_all_async (int) __GOACC_NOTHROW; +void acc_init (acc_device_t) __GOACC_NOTHROW; +void acc_shutdown (acc_device_t) __GOACC_NOTHROW; +int acc_on_device (acc_device_t) __GOACC_NOTHROW; +void *acc_malloc (size_t) __GOACC_NOTHROW; +void acc_free (void *) __GOACC_NOTHROW; +/* Some of these would be more correct with const qualifiers, but + the standard specifies otherwise. */ +void *acc_copyin (void *, size_t) __GOACC_NOTHROW; +void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW; +void *acc_create (void *, size_t) __GOACC_NOTHROW; +void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW; +void acc_copyout (void *, size_t) __GOACC_NOTHROW; +void acc_delete (void *, size_t) __GOACC_NOTHROW; +void acc_update_device (void *, size_t) __GOACC_NOTHROW; +void acc_update_self (void *, size_t) __GOACC_NOTHROW; +void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW; +void acc_unmap_data (void *) __GOACC_NOTHROW; +void *acc_deviceptr (void *) __GOACC_NOTHROW; +void *acc_hostptr (void *) __GOACC_NOTHROW; +int acc_is_present (void *, size_t) __GOACC_NOTHROW; +void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; +void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; + +/* Old names. OpenACC does not specify whether these can or must + not be macros, inlines or aliases for the new names. */ +#define acc_pcreate acc_present_or_create +#define acc_pcopyin acc_present_or_copyin + +/* CUDA-specific routines. */ +void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; +void *acc_get_current_cuda_context (void) __GOACC_NOTHROW; +void *acc_get_cuda_stream (int) __GOACC_NOTHROW; +int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; + +#ifdef __cplusplus +} +#endif + +#endif /* _OPENACC_H */ diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h new file mode 100644 index 00000000000..28659a1e0b0 --- /dev/null +++ b/libgomp/openacc_lib.h @@ -0,0 +1,381 @@ +! OpenACC Runtime Library Definitions. -*- mode: fortran -*- + +! Copyright (C) 2014-2015 Free Software Foundation, Inc. + +! Contributed by Tobias Burnus +! and Mentor Embedded. + +! This file is part of the GNU Offloading and Multi Processing Library +! (libgomp). + +! Libgomp is free software; you can redistribute it and/or modify it +! under the terms of the GNU General Public License as published by +! the Free Software Foundation; either version 3, or (at your option) +! any later version. + +! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +! FOR A PARTICULAR PURPOSE. See the GNU General Public License for +! more details. + +! Under Section 7 of GPL version 3, you are granted additional +! permissions described in the GCC Runtime Library Exception, version +! 3.1, as published by the Free Software Foundation. + +! You should have received a copy of the GNU General Public License and +! a copy of the GCC Runtime Library Exception along with this program; +! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +! . + +! NOTE: Due to the use of dimension (..), the code only works when compiled +! with -std=f2008ts/gnu/legacy but not with other standard settings. +! Alternatively, the user can use the module version, which permits +! compilation with -std=f95. + + integer, parameter :: acc_device_kind = 4 + +! Keep in sync with include/gomp-constants.h. + integer (acc_device_kind), parameter :: acc_device_none = 0 + integer (acc_device_kind), parameter :: acc_device_default = 1 + integer (acc_device_kind), parameter :: acc_device_host = 2 + integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 + integer (acc_device_kind), parameter :: acc_device_not_host = 4 + integer (acc_device_kind), parameter :: acc_device_nvidia = 5 + + integer, parameter :: acc_handle_kind = 4 + +! Keep in sync with include/gomp-constants.h. + integer (acc_handle_kind), parameter :: acc_async_noval = -1 + integer (acc_handle_kind), parameter :: acc_async_sync = -2 + + integer, parameter :: openacc_version = 201306 + + interface acc_get_num_devices + function acc_get_num_devices_h (d) + import acc_device_kind + integer acc_get_num_devices_h + integer (acc_device_kind) d + end function + end interface + + interface acc_set_device_type + subroutine acc_set_device_type_h (d) + import acc_device_kind + integer (acc_device_kind) d + end subroutine + end interface + + interface acc_get_device_type + function acc_get_device_type_h () + import acc_device_kind + integer (acc_device_kind) acc_get_device_type_h + end function + end interface + + interface acc_set_device_num + subroutine acc_set_device_num_h (n, d) + import acc_device_kind + integer n + integer (acc_device_kind) d + end subroutine + end interface + + interface acc_get_device_num + function acc_get_device_num_h (d) + import acc_device_kind + integer acc_get_device_num_h + integer (acc_device_kind) d + end function + end interface + + interface acc_async_test + function acc_async_test_h (a) + logical acc_async_test_h + integer a + end function + end interface + + interface acc_async_test_all + function acc_async_test_all_h () + logical acc_async_test_all_h + end function + end interface + + interface acc_wait + subroutine acc_wait_h (a) + integer a + end subroutine + end interface + + interface acc_wait_async + subroutine acc_wait_async_h (a1, a2) + integer a1, a2 + end subroutine + end interface + + interface acc_wait_all + subroutine acc_wait_all_h () + end subroutine + end interface + + interface acc_wait_all_async + subroutine acc_wait_all_async_h (a) + integer a + end subroutine + end interface + + interface acc_init + subroutine acc_init_h (devicetype) + import acc_device_kind + integer (acc_device_kind) devicetype + end subroutine + end interface + + interface acc_shutdown + subroutine acc_shutdown_h (devicetype) + import acc_device_kind + integer (acc_device_kind) devicetype + end subroutine + end interface + + interface acc_on_device + function acc_on_device_h (devicetype) + import acc_device_kind + logical acc_on_device_h + integer (acc_device_kind) devicetype + end function + end interface + + ! acc_malloc: Only available in C/C++ + ! acc_free: Only available in C/C++ + + interface acc_copyin + subroutine acc_copyin_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyin_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyin_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_present_or_copyin + subroutine acc_present_or_copyin_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_present_or_copyin_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_present_or_copyin_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_pcopyin + subroutine acc_pcopyin_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_pcopyin_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_pcopyin_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_create + subroutine acc_create_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_create_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_create_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_present_or_create + subroutine acc_present_or_create_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_present_or_create_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_present_or_create_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_pcreate + subroutine acc_pcreate_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_pcreate_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_pcreate_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_copyout + subroutine acc_copyout_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyout_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyout_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_delete + subroutine acc_delete_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_delete_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_delete_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_update_device + subroutine acc_update_device_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_update_device_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_update_device_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + interface acc_update_self + subroutine acc_update_self_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_update_self_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_update_self_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + + ! acc_map_data: Only available in C/C++ + ! acc_unmap_data: Only available in C/C++ + ! acc_deviceptr: Only available in C/C++ + ! acc_ostptr: Only available in C/C++ + + interface acc_is_present + function acc_is_present_32_h (a, len) + use iso_c_binding, only: c_int32_t + logical acc_is_present_32_h + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end function + + function acc_is_present_64_h (a, len) + use iso_c_binding, only: c_int64_t + logical acc_is_present_64_h + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end function + + function acc_is_present_array_h (a) + logical acc_is_present_array_h + type (*), dimension (..), contiguous :: a + end function + end interface + + ! acc_memcpy_to_device: Only available in C/C++ + ! acc_memcpy_from_device: Only available in C/C++ diff --git a/libgomp/plugin/Makefrag.am b/libgomp/plugin/Makefrag.am new file mode 100644 index 00000000000..167485f52c1 --- /dev/null +++ b/libgomp/plugin/Makefrag.am @@ -0,0 +1,49 @@ +# Plugins for offload execution, Makefile.am fragment. +# +# Copyright (C) 2014-2015 Free Software Foundation, Inc. +# +# Contributed by Mentor Embedded. +# +# This file is part of the GNU Offloading and Multi Processing Library +# (libgomp). +# +# Libgomp is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + +if PLUGIN_NVPTX +# Nvidia PTX OpenACC plugin. +libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) +toolexeclib_LTLIBRARIES += libgomp-plugin-nvptx.la +libgomp_plugin_nvptx_la_SOURCES = plugin/plugin-nvptx.c +libgomp_plugin_nvptx_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_NVPTX_CPPFLAGS) +libgomp_plugin_nvptx_la_LDFLAGS = $(libgomp_plugin_nvptx_version_info) \ + $(lt_host_flags) +libgomp_plugin_nvptx_la_LDFLAGS += $(PLUGIN_NVPTX_LDFLAGS) +libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS) +libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static +endif + +libgomp_plugin_host_nonshm_version_info = -version-info $(libtool_VERSION) +toolexeclib_LTLIBRARIES += libgomp-plugin-host_nonshm.la +libgomp_plugin_host_nonshm_la_SOURCES = plugin/plugin-host.c +libgomp_plugin_host_nonshm_la_CPPFLAGS = $(AM_CPPFLAGS) -DHOST_NONSHM_PLUGIN +libgomp_plugin_host_nonshm_la_LDFLAGS = \ + $(libgomp_plugin_host_nonshm_version_info) $(lt_host_flags) +libgomp_plugin_host_nonshm_la_LIBADD = libgomp.la +libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS = --tag=disable-static diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac new file mode 100644 index 00000000000..254c68853cb --- /dev/null +++ b/libgomp/plugin/configfrag.ac @@ -0,0 +1,148 @@ +# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*- +# +# Copyright (C) 2014-2015 Free Software Foundation, Inc. +# +# Contributed by Mentor Embedded. +# +# This file is part of the GNU Offloading and Multi Processing Library +# (libgomp). +# +# Libgomp is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + +offload_targets= +AC_SUBST(offload_targets) +plugin_support=yes +AC_CHECK_LIB(dl, dlsym, , [plugin_support=no]) +if test x"$plugin_support" = xyes; then + AC_DEFINE(PLUGIN_SUPPORT, 1, + [Define if all infrastructure, needed for plugins, is supported.]) + offload_targets=host_nonshm +elif test "x${enable_offload_targets-no}" != xno; then + AC_MSG_ERROR([Can't support offloading without support for plugins]) +fi + +# Look for the CUDA driver package. +CUDA_DRIVER_INCLUDE= +CUDA_DRIVER_LIB= +AC_SUBST(CUDA_DRIVER_INCLUDE) +AC_SUBST(CUDA_DRIVER_LIB) +CUDA_DRIVER_CPPFLAGS= +CUDA_DRIVER_LDFLAGS= +AC_ARG_WITH(cuda-driver, + [AS_HELP_STRING([--with-cuda-driver=PATH], + [specify prefix directory for installed CUDA driver package. + Equivalent to --with-cuda-driver-include=PATH/include + plus --with-cuda-driver-lib=PATH/lib])]) +AC_ARG_WITH(cuda-driver-include, + [AS_HELP_STRING([--with-cuda-driver-include=PATH], + [specify directory for installed CUDA driver include files])]) +AC_ARG_WITH(cuda-driver-lib, + [AS_HELP_STRING([--with-cuda-driver-lib=PATH], + [specify directory for the installed CUDA driver library])]) +if test "x$with_cuda_driver" != x; then + CUDA_DRIVER_INCLUDE=$with_cuda_driver/include + CUDA_DRIVER_LIB=$with_cuda_driver/lib +fi +if test "x$with_cuda_driver_include" != x; then + CUDA_DRIVER_INCLUDE=$with_cuda_driver_include +fi +if test "x$with_cuda_driver_lib" != x; then + CUDA_DRIVER_LIB=$with_cuda_driver_lib +fi +if test "x$CUDA_DRIVER_INCLUDE" != x; then + CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE +fi +if test "x$CUDA_DRIVER_LIB" != x; then + CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB +fi + +PLUGIN_NVPTX=0 +PLUGIN_NVPTX_CPPFLAGS= +PLUGIN_NVPTX_LDFLAGS= +PLUGIN_NVPTX_LIBS= +AC_SUBST(PLUGIN_NVPTX) +AC_SUBST(PLUGIN_NVPTX_CPPFLAGS) +AC_SUBST(PLUGIN_NVPTX_LDFLAGS) +AC_SUBST(PLUGIN_NVPTX_LIBS) + +# Get offload targets and path to install tree of offloading compiler. +offload_additional_options= +offload_additional_lib_paths= +AC_SUBST(offload_additional_options) +AC_SUBST(offload_additional_lib_paths) +if test x"$enable_offload_targets" != x; then + for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do + tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'` + tgt=`echo $tgt | sed 's/=.*//'` + case $tgt in + *-intelmic-* | *-intelmicemul-*) + tgt_name=intelmic + ;; + nvptx*) + tgt_name=nvptx + PLUGIN_NVPTX=$tgt + PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS + PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS + PLUGIN_NVPTX_LIBS='-lcuda' + + PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS" + PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS + LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS" + PLUGIN_NVPTX_save_LIBS=$LIBS + LIBS="$PLUGIN_NVPTX_LIBS $LIBS" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [#include "cuda.h"], + [CUresult r = cuCtxPushCurrent (NULL);])], + [PLUGIN_NVPTX=1]) + CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS + LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS + LIBS=$PLUGIN_NVPTX_save_LIBS + case $PLUGIN_NVPTX in + nvptx*) + PLUGIN_NVPTX=0 + AC_MSG_ERROR([CUDA driver package required for nvptx support]) + ;; + esac + ;; + *) + AC_MSG_ERROR([unknown offload target specified]) + ;; + esac + if test x"$offload_targets" = x; then + offload_targets=$tgt_name + else + offload_targets=$offload_targets,$tgt_name + fi + if test x"$tgt_dir" != x; then + offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin" + offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32" + else + offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)" + offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir" + fi + done +fi +AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets", + [Define to hold the list of target names suitable for offloading.]) +AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1]) +AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX], + [Define to 1 if the NVIDIA plugin is built, 0 if not.]) diff --git a/libgomp/plugin/plugin-host.c b/libgomp/plugin/plugin-host.c new file mode 100644 index 00000000000..ebf7f11caf7 --- /dev/null +++ b/libgomp/plugin/plugin-host.c @@ -0,0 +1,266 @@ +/* OpenACC Runtime Library: acc_device_host, acc_device_host_nonshm. + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Simple implementation of support routines for a shared-memory + acc_device_host, and a non-shared memory acc_device_host_nonshm, with the + latter built as a plugin. */ + +#include "openacc.h" +#include "config.h" +#ifdef HOST_NONSHM_PLUGIN +#include "libgomp-plugin.h" +#include "oacc-plugin.h" +#else +#include "libgomp.h" +#include "oacc-int.h" +#endif + +#include +#include +#include +#include + +#ifdef HOST_NONSHM_PLUGIN +#define STATIC +#define GOMP(X) GOMP_PLUGIN_##X +#define SELF "host_nonshm plugin: " +#else +#define STATIC static +#define GOMP(X) gomp_##X +#define SELF "host: " +#endif + +STATIC const char * +GOMP_OFFLOAD_get_name (void) +{ +#ifdef HOST_NONSHM_PLUGIN + return "host_nonshm"; +#else + return "host"; +#endif +} + +STATIC unsigned int +GOMP_OFFLOAD_get_caps (void) +{ + unsigned int caps = (GOMP_OFFLOAD_CAP_OPENACC_200 + | GOMP_OFFLOAD_CAP_NATIVE_EXEC); + +#ifndef HOST_NONSHM_PLUGIN + caps |= GOMP_OFFLOAD_CAP_SHARED_MEM; +#endif + + return caps; +} + +STATIC int +GOMP_OFFLOAD_get_type (void) +{ +#ifdef HOST_NONSHM_PLUGIN + return OFFLOAD_TARGET_TYPE_HOST_NONSHM; +#else + return OFFLOAD_TARGET_TYPE_HOST; +#endif +} + +STATIC int +GOMP_OFFLOAD_get_num_devices (void) +{ + return 1; +} + +STATIC void +GOMP_OFFLOAD_register_image (void *host_table __attribute__ ((unused)), + void *target_data __attribute__ ((unused))) +{ +} + +STATIC void +GOMP_OFFLOAD_init_device (int n __attribute__ ((unused))) +{ +} + +STATIC void +GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused))) +{ +} + +STATIC int +GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)), + struct mapping_table **table __attribute__ ((unused))) +{ + return 0; +} + +STATIC void * +GOMP_OFFLOAD_openacc_open_device (int n) +{ + return (void *) (intptr_t) n; +} + +STATIC int +GOMP_OFFLOAD_openacc_close_device (void *hnd) +{ + return 0; +} + +STATIC int +GOMP_OFFLOAD_openacc_get_device_num (void) +{ + return 0; +} + +STATIC void +GOMP_OFFLOAD_openacc_set_device_num (int n) +{ + if (n > 0) + GOMP (fatal) ("device number %u out of range for host execution", n); +} + +STATIC void * +GOMP_OFFLOAD_alloc (int n __attribute__ ((unused)), size_t s) +{ + return GOMP (malloc) (s); +} + +STATIC void +GOMP_OFFLOAD_free (int n __attribute__ ((unused)), void *p) +{ + free (p); +} + +STATIC void * +GOMP_OFFLOAD_host2dev (int n __attribute__ ((unused)), void *d, const void *h, + size_t s) +{ +#ifdef HOST_NONSHM_PLUGIN + memcpy (d, h, s); +#endif + + return 0; +} + +STATIC void * +GOMP_OFFLOAD_dev2host (int n __attribute__ ((unused)), void *h, const void *d, + size_t s) +{ +#ifdef HOST_NONSHM_PLUGIN + memcpy (h, d, s); +#endif + + return 0; +} + +STATIC void +GOMP_OFFLOAD_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars) +{ + void (*fn)(void *) = (void (*)(void *)) fn_ptr; + + fn (vars); +} + +STATIC void +GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *), + size_t mapnum __attribute__ ((unused)), + void **hostaddrs __attribute__ ((unused)), + void **devaddrs __attribute__ ((unused)), + size_t *sizes __attribute__ ((unused)), + unsigned short *kinds __attribute__ ((unused)), + int num_gangs __attribute__ ((unused)), + int num_workers __attribute__ ((unused)), + int vector_length __attribute__ ((unused)), + int async __attribute__ ((unused)), + void *targ_mem_desc __attribute__ ((unused))) +{ +#ifdef HOST_NONSHM_PLUGIN + fn (devaddrs); +#else + fn (hostaddrs); +#endif +} + +STATIC void +GOMP_OFFLOAD_openacc_register_async_cleanup (void *targ_mem_desc) +{ +#ifdef HOST_NONSHM_PLUGIN + /* "Asynchronous" launches are executed synchronously on the (non-SHM) host, + so there's no point in delaying host-side cleanup -- just do it now. */ + GOMP_PLUGIN_async_unmap_vars (targ_mem_desc); +#endif +} + +STATIC void +GOMP_OFFLOAD_openacc_async_set_async (int async __attribute__ ((unused))) +{ +} + +STATIC int +GOMP_OFFLOAD_openacc_async_test (int async __attribute__ ((unused))) +{ + return 1; +} + +STATIC int +GOMP_OFFLOAD_openacc_async_test_all (void) +{ + return 1; +} + +STATIC void +GOMP_OFFLOAD_openacc_async_wait (int async __attribute__ ((unused))) +{ +} + +STATIC void +GOMP_OFFLOAD_openacc_async_wait_all (void) +{ +} + +STATIC void +GOMP_OFFLOAD_openacc_async_wait_async (int async1 __attribute__ ((unused)), + int async2 __attribute__ ((unused))) +{ +} + +STATIC void +GOMP_OFFLOAD_openacc_async_wait_all_async (int async __attribute__ ((unused))) +{ +} + +STATIC void * +GOMP_OFFLOAD_openacc_create_thread_data (void *targ_data + __attribute__ ((unused))) +{ + return NULL; +} + +STATIC void +GOMP_OFFLOAD_openacc_destroy_thread_data (void *tls_data + __attribute__ ((unused))) +{ +} diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c new file mode 100644 index 00000000000..483cb7559e8 --- /dev/null +++ b/libgomp/plugin/plugin-nvptx.c @@ -0,0 +1,1791 @@ +/* Plugin for NVPTX execution. + + Copyright (C) 2013-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Nvidia PTX-specific parts of OpenACC support. The cuda driver + library appears to hold some implicit state, but the documentation + is not clear as to what that state might be. Or how one might + propagate it from one thread to another. */ + +#include "openacc.h" +#include "config.h" +#include "libgomp-plugin.h" +#include "oacc-ptx.h" +#include "oacc-plugin.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARRAYSIZE(X) (sizeof (X) / sizeof ((X)[0])) + +static struct +{ + CUresult r; + char *m; +} cuda_errlist[]= +{ + { CUDA_ERROR_INVALID_VALUE, "invalid value" }, + { CUDA_ERROR_OUT_OF_MEMORY, "out of memory" }, + { CUDA_ERROR_NOT_INITIALIZED, "not initialized" }, + { CUDA_ERROR_DEINITIALIZED, "deinitialized" }, + { CUDA_ERROR_PROFILER_DISABLED, "profiler disabled" }, + { CUDA_ERROR_PROFILER_NOT_INITIALIZED, "profiler not initialized" }, + { CUDA_ERROR_PROFILER_ALREADY_STARTED, "already started" }, + { CUDA_ERROR_PROFILER_ALREADY_STOPPED, "already stopped" }, + { CUDA_ERROR_NO_DEVICE, "no device" }, + { CUDA_ERROR_INVALID_DEVICE, "invalid device" }, + { CUDA_ERROR_INVALID_IMAGE, "invalid image" }, + { CUDA_ERROR_INVALID_CONTEXT, "invalid context" }, + { CUDA_ERROR_CONTEXT_ALREADY_CURRENT, "context already current" }, + { CUDA_ERROR_MAP_FAILED, "map error" }, + { CUDA_ERROR_UNMAP_FAILED, "unmap error" }, + { CUDA_ERROR_ARRAY_IS_MAPPED, "array is mapped" }, + { CUDA_ERROR_ALREADY_MAPPED, "already mapped" }, + { CUDA_ERROR_NO_BINARY_FOR_GPU, "no binary for gpu" }, + { CUDA_ERROR_ALREADY_ACQUIRED, "already acquired" }, + { CUDA_ERROR_NOT_MAPPED, "not mapped" }, + { CUDA_ERROR_NOT_MAPPED_AS_ARRAY, "not mapped as array" }, + { CUDA_ERROR_NOT_MAPPED_AS_POINTER, "not mapped as pointer" }, + { CUDA_ERROR_ECC_UNCORRECTABLE, "ecc uncorrectable" }, + { CUDA_ERROR_UNSUPPORTED_LIMIT, "unsupported limit" }, + { CUDA_ERROR_CONTEXT_ALREADY_IN_USE, "context already in use" }, + { CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, "peer access unsupported" }, + { CUDA_ERROR_INVALID_SOURCE, "invalid source" }, + { CUDA_ERROR_FILE_NOT_FOUND, "file not found" }, + { CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + "shared object symbol not found" }, + { CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, "shared object init error" }, + { CUDA_ERROR_OPERATING_SYSTEM, "operating system" }, + { CUDA_ERROR_INVALID_HANDLE, "invalid handle" }, + { CUDA_ERROR_NOT_FOUND, "not found" }, + { CUDA_ERROR_NOT_READY, "not ready" }, + { CUDA_ERROR_LAUNCH_FAILED, "launch error" }, + { CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, "launch out of resources" }, + { CUDA_ERROR_LAUNCH_TIMEOUT, "launch timeout" }, + { CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + "launch incompatibe texturing" }, + { CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, "peer access already enabled" }, + { CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, "peer access not enabled " }, + { CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE, "primary cotext active" }, + { CUDA_ERROR_CONTEXT_IS_DESTROYED, "context is destroyed" }, + { CUDA_ERROR_ASSERT, "assert" }, + { CUDA_ERROR_TOO_MANY_PEERS, "too many peers" }, + { CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED, + "host memory already registered" }, + { CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, "host memory not registered" }, + { CUDA_ERROR_NOT_PERMITTED, "not permitted" }, + { CUDA_ERROR_NOT_SUPPORTED, "not supported" }, + { CUDA_ERROR_UNKNOWN, "unknown" } +}; + +static char errmsg[128]; + +static char * +cuda_error (CUresult r) +{ + int i; + + for (i = 0; i < ARRAYSIZE (cuda_errlist); i++) + { + if (cuda_errlist[i].r == r) + return &cuda_errlist[i].m[0]; + } + + sprintf (&errmsg[0], "unknown result code: %5d", r); + + return &errmsg[0]; +} + +struct targ_fn_descriptor +{ + CUfunction fn; + const char *name; +}; + +static bool ptx_inited = false; + +struct ptx_stream +{ + CUstream stream; + pthread_t host_thread; + bool multithreaded; + + CUdeviceptr d; + void *h; + void *h_begin; + void *h_end; + void *h_next; + void *h_prev; + void *h_tail; + + struct ptx_stream *next; +}; + +/* Thread-specific data for PTX. */ + +struct nvptx_thread +{ + struct ptx_stream *current_stream; + struct ptx_device *ptx_dev; +}; + +struct map +{ + int async; + size_t size; + char mappings[0]; +}; + +static void +map_init (struct ptx_stream *s) +{ + CUresult r; + + int size = getpagesize (); + + assert (s); + assert (!s->d); + assert (!s->h); + + r = cuMemAllocHost (&s->h, size); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemAllocHost error: %s", cuda_error (r)); + + r = cuMemHostGetDevicePointer (&s->d, s->h, 0); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemHostGetDevicePointer error: %s", cuda_error (r)); + + assert (s->h); + + s->h_begin = s->h; + s->h_end = s->h_begin + size; + s->h_next = s->h_prev = s->h_tail = s->h_begin; + + assert (s->h_next); + assert (s->h_end); +} + +static void +map_fini (struct ptx_stream *s) +{ + CUresult r; + + r = cuMemFreeHost (s->h); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemFreeHost error: %s", cuda_error (r)); +} + +static void +map_pop (struct ptx_stream *s) +{ + struct map *m; + + assert (s != NULL); + assert (s->h_next); + assert (s->h_prev); + assert (s->h_tail); + + m = s->h_tail; + + s->h_tail += m->size; + + if (s->h_tail >= s->h_end) + s->h_tail = s->h_begin + (int) (s->h_tail - s->h_end); + + if (s->h_next == s->h_tail) + s->h_prev = s->h_next; + + assert (s->h_next >= s->h_begin); + assert (s->h_tail >= s->h_begin); + assert (s->h_prev >= s->h_begin); + + assert (s->h_next <= s->h_end); + assert (s->h_tail <= s->h_end); + assert (s->h_prev <= s->h_end); +} + +static void +map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d) +{ + int left; + int offset; + struct map *m; + + assert (s != NULL); + + left = s->h_end - s->h_next; + size += sizeof (struct map); + + assert (s->h_prev); + assert (s->h_next); + + if (size >= left) + { + m = s->h_prev; + m->size += left; + s->h_next = s->h_begin; + + if (s->h_next + size > s->h_end) + GOMP_PLUGIN_fatal ("unable to push map"); + } + + assert (s->h_next); + + m = s->h_next; + m->async = async; + m->size = size; + + offset = (void *)&m->mappings[0] - s->h; + + *d = (void *)(s->d + offset); + *h = (void *)(s->h + offset); + + s->h_prev = s->h_next; + s->h_next += size; + + assert (s->h_prev); + assert (s->h_next); + + assert (s->h_next >= s->h_begin); + assert (s->h_tail >= s->h_begin); + assert (s->h_prev >= s->h_begin); + assert (s->h_next <= s->h_end); + assert (s->h_tail <= s->h_end); + assert (s->h_prev <= s->h_end); + + return; +} + +struct ptx_device +{ + CUcontext ctx; + bool ctx_shared; + CUdevice dev; + struct ptx_stream *null_stream; + /* All non-null streams associated with this device (actually context), + either created implicitly or passed in from the user (via + acc_set_cuda_stream). */ + struct ptx_stream *active_streams; + struct { + struct ptx_stream **arr; + int size; + } async_streams; + /* A lock for use when manipulating the above stream list and array. */ + pthread_mutex_t stream_lock; + int ord; + bool overlap; + bool map; + bool concur; + int mode; + bool mkern; + + struct ptx_device *next; +}; + +enum ptx_event_type +{ + PTX_EVT_MEM, + PTX_EVT_KNL, + PTX_EVT_SYNC, + PTX_EVT_ASYNC_CLEANUP +}; + +struct ptx_event +{ + CUevent *evt; + int type; + void *addr; + int ord; + + struct ptx_event *next; +}; + +static pthread_mutex_t ptx_event_lock; +static struct ptx_event *ptx_events; + +#define _XSTR(s) _STR(s) +#define _STR(s) #s + +static struct _synames +{ + char *n; +} cuda_symnames[] = +{ + { _XSTR (cuCtxCreate) }, + { _XSTR (cuCtxDestroy) }, + { _XSTR (cuCtxGetCurrent) }, + { _XSTR (cuCtxPushCurrent) }, + { _XSTR (cuCtxSynchronize) }, + { _XSTR (cuDeviceGet) }, + { _XSTR (cuDeviceGetAttribute) }, + { _XSTR (cuDeviceGetCount) }, + { _XSTR (cuEventCreate) }, + { _XSTR (cuEventDestroy) }, + { _XSTR (cuEventQuery) }, + { _XSTR (cuEventRecord) }, + { _XSTR (cuInit) }, + { _XSTR (cuLaunchKernel) }, + { _XSTR (cuLinkAddData) }, + { _XSTR (cuLinkComplete) }, + { _XSTR (cuLinkCreate) }, + { _XSTR (cuMemAlloc) }, + { _XSTR (cuMemAllocHost) }, + { _XSTR (cuMemcpy) }, + { _XSTR (cuMemcpyDtoH) }, + { _XSTR (cuMemcpyDtoHAsync) }, + { _XSTR (cuMemcpyHtoD) }, + { _XSTR (cuMemcpyHtoDAsync) }, + { _XSTR (cuMemFree) }, + { _XSTR (cuMemFreeHost) }, + { _XSTR (cuMemGetAddressRange) }, + { _XSTR (cuMemHostGetDevicePointer) }, + { _XSTR (cuMemHostRegister) }, + { _XSTR (cuMemHostUnregister) }, + { _XSTR (cuModuleGetFunction) }, + { _XSTR (cuModuleLoadData) }, + { _XSTR (cuStreamDestroy) }, + { _XSTR (cuStreamQuery) }, + { _XSTR (cuStreamSynchronize) }, + { _XSTR (cuStreamWaitEvent) } +}; + +static int +verify_device_library (void) +{ + int i; + void *dh, *ds; + + dh = dlopen ("libcuda.so", RTLD_LAZY); + if (!dh) + return -1; + + for (i = 0; i < ARRAYSIZE (cuda_symnames); i++) + { + ds = dlsym (dh, cuda_symnames[i].n); + if (!ds) + return -1; + } + + dlclose (dh); + + return 0; +} + +static inline struct nvptx_thread * +nvptx_thread (void) +{ + return (struct nvptx_thread *) GOMP_PLUGIN_acc_thread (); +} + +static void +init_streams_for_device (struct ptx_device *ptx_dev, int concurrency) +{ + int i; + struct ptx_stream *null_stream + = GOMP_PLUGIN_malloc (sizeof (struct ptx_stream)); + + null_stream->stream = NULL; + null_stream->host_thread = pthread_self (); + null_stream->multithreaded = true; + null_stream->d = (CUdeviceptr) NULL; + null_stream->h = NULL; + map_init (null_stream); + ptx_dev->null_stream = null_stream; + + ptx_dev->active_streams = NULL; + pthread_mutex_init (&ptx_dev->stream_lock, NULL); + + if (concurrency < 1) + concurrency = 1; + + /* This is just a guess -- make space for as many async streams as the + current device is capable of concurrently executing. This can grow + later as necessary. No streams are created yet. */ + ptx_dev->async_streams.arr + = GOMP_PLUGIN_malloc (concurrency * sizeof (struct ptx_stream *)); + ptx_dev->async_streams.size = concurrency; + + for (i = 0; i < concurrency; i++) + ptx_dev->async_streams.arr[i] = NULL; +} + +static void +fini_streams_for_device (struct ptx_device *ptx_dev) +{ + free (ptx_dev->async_streams.arr); + + while (ptx_dev->active_streams != NULL) + { + struct ptx_stream *s = ptx_dev->active_streams; + ptx_dev->active_streams = ptx_dev->active_streams->next; + + cuStreamDestroy (s->stream); + map_fini (s); + free (s); + } + + map_fini (ptx_dev->null_stream); + free (ptx_dev->null_stream); +} + +/* Select a stream for (OpenACC-semantics) ASYNC argument for the current + thread THREAD (and also current device/context). If CREATE is true, create + the stream if it does not exist (or use EXISTING if it is non-NULL), and + associate the stream with the same thread argument. Returns stream to use + as result. */ + +static struct ptx_stream * +select_stream_for_async (int async, pthread_t thread, bool create, + CUstream existing) +{ + struct nvptx_thread *nvthd = nvptx_thread (); + /* Local copy of TLS variable. */ + struct ptx_device *ptx_dev = nvthd->ptx_dev; + struct ptx_stream *stream = NULL; + int orig_async = async; + + /* The special value acc_async_noval (-1) maps (for now) to an + implicitly-created stream, which is then handled the same as any other + numbered async stream. Other options are available, e.g. using the null + stream for anonymous async operations, or choosing an idle stream from an + active set. But, stick with this for now. */ + if (async > acc_async_sync) + async++; + + if (create) + pthread_mutex_lock (&ptx_dev->stream_lock); + + /* NOTE: AFAICT there's no particular need for acc_async_sync to map to the + null stream, and in fact better performance may be obtainable if it doesn't + (because the null stream enforces overly-strict synchronisation with + respect to other streams for legacy reasons, and that's probably not + needed with OpenACC). Maybe investigate later. */ + if (async == acc_async_sync) + stream = ptx_dev->null_stream; + else if (async >= 0 && async < ptx_dev->async_streams.size + && ptx_dev->async_streams.arr[async] && !(create && existing)) + stream = ptx_dev->async_streams.arr[async]; + else if (async >= 0 && create) + { + if (async >= ptx_dev->async_streams.size) + { + int i, newsize = ptx_dev->async_streams.size * 2; + + if (async >= newsize) + newsize = async + 1; + + ptx_dev->async_streams.arr + = GOMP_PLUGIN_realloc (ptx_dev->async_streams.arr, + newsize * sizeof (struct ptx_stream *)); + + for (i = ptx_dev->async_streams.size; i < newsize; i++) + ptx_dev->async_streams.arr[i] = NULL; + + ptx_dev->async_streams.size = newsize; + } + + /* Create a new stream on-demand if there isn't one already, or if we're + setting a particular async value to an existing (externally-provided) + stream. */ + if (!ptx_dev->async_streams.arr[async] || existing) + { + CUresult r; + struct ptx_stream *s + = GOMP_PLUGIN_malloc (sizeof (struct ptx_stream)); + + if (existing) + s->stream = existing; + else + { + r = cuStreamCreate (&s->stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuStreamCreate error: %s", cuda_error (r)); + } + + /* If CREATE is true, we're going to be queueing some work on this + stream. Associate it with the current host thread. */ + s->host_thread = thread; + s->multithreaded = false; + + s->d = (CUdeviceptr) NULL; + s->h = NULL; + map_init (s); + + s->next = ptx_dev->active_streams; + ptx_dev->active_streams = s; + ptx_dev->async_streams.arr[async] = s; + } + + stream = ptx_dev->async_streams.arr[async]; + } + else if (async < 0) + GOMP_PLUGIN_fatal ("bad async %d", async); + + if (create) + { + assert (stream != NULL); + + /* If we're trying to use the same stream from different threads + simultaneously, set stream->multithreaded to true. This affects the + behaviour of acc_async_test_all and acc_wait_all, which are supposed to + only wait for asynchronous launches from the same host thread they are + invoked on. If multiple threads use the same async value, we make note + of that here and fall back to testing/waiting for all threads in those + functions. */ + if (thread != stream->host_thread) + stream->multithreaded = true; + + pthread_mutex_unlock (&ptx_dev->stream_lock); + } + else if (stream && !stream->multithreaded + && !pthread_equal (stream->host_thread, thread)) + GOMP_PLUGIN_fatal ("async %d used on wrong thread", orig_async); + + return stream; +} + +static int nvptx_get_num_devices (void); + +/* Initialize the device. */ +static int +nvptx_init (void) +{ + CUresult r; + int rc; + + if (ptx_inited) + return nvptx_get_num_devices (); + + rc = verify_device_library (); + if (rc < 0) + return -1; + + r = cuInit (0); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuInit error: %s", cuda_error (r)); + + ptx_events = NULL; + + pthread_mutex_init (&ptx_event_lock, NULL); + + ptx_inited = true; + + return nvptx_get_num_devices (); +} + +static void +nvptx_fini (void) +{ + ptx_inited = false; +} + +static void * +nvptx_open_device (int n) +{ + struct ptx_device *ptx_dev; + CUdevice dev; + CUresult r; + int async_engines, pi; + + r = cuDeviceGet (&dev, n); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuDeviceGet error: %s", cuda_error (r)); + + ptx_dev = GOMP_PLUGIN_malloc (sizeof (struct ptx_device)); + + ptx_dev->ord = n; + ptx_dev->dev = dev; + ptx_dev->ctx_shared = false; + + r = cuCtxGetCurrent (&ptx_dev->ctx); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r)); + + if (!ptx_dev->ctx) + { + r = cuCtxCreate (&ptx_dev->ctx, CU_CTX_SCHED_AUTO, dev); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuCtxCreate error: %s", cuda_error (r)); + } + else + ptx_dev->ctx_shared = true; + + r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, dev); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r)); + + ptx_dev->overlap = pi; + + r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r)); + + ptx_dev->map = pi; + + r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r)); + + ptx_dev->concur = pi; + + r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r)); + + ptx_dev->mode = pi; + + r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r)); + + ptx_dev->mkern = pi; + + r = cuDeviceGetAttribute (&async_engines, + CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, dev); + if (r != CUDA_SUCCESS) + async_engines = 1; + + init_streams_for_device (ptx_dev, async_engines); + + return (void *) ptx_dev; +} + +static int +nvptx_close_device (void *targ_data) +{ + CUresult r; + struct ptx_device *ptx_dev = targ_data; + + if (!ptx_dev) + return 0; + + fini_streams_for_device (ptx_dev); + + if (!ptx_dev->ctx_shared) + { + r = cuCtxDestroy (ptx_dev->ctx); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuCtxDestroy error: %s", cuda_error (r)); + } + + free (ptx_dev); + + return 0; +} + +static int +nvptx_get_num_devices (void) +{ + int n; + CUresult r; + + /* This function will be called before the plugin has been initialized in + order to enumerate available devices, but CUDA API routines can't be used + until cuInit has been called. Just call it now (but don't yet do any + further initialization). */ + if (!ptx_inited) + cuInit (0); + + r = cuDeviceGetCount (&n); + if (r!= CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuDeviceGetCount error: %s", cuda_error (r)); + + return n; +} + + +static void +link_ptx (CUmodule *module, char *ptx_code) +{ + CUjit_option opts[7]; + void *optvals[7]; + float elapsed = 0.0; +#define LOGSIZE 8192 + char elog[LOGSIZE]; + char ilog[LOGSIZE]; + unsigned long logsize = LOGSIZE; + CUlinkState linkstate; + CUresult r; + void *linkout; + size_t linkoutsize __attribute__ ((unused)); + + GOMP_PLUGIN_debug (0, "attempting to load:\n---\n%s\n---\n", ptx_code); + + opts[0] = CU_JIT_WALL_TIME; + optvals[0] = &elapsed; + + opts[1] = CU_JIT_INFO_LOG_BUFFER; + optvals[1] = &ilog[0]; + + opts[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + optvals[2] = (void *) logsize; + + opts[3] = CU_JIT_ERROR_LOG_BUFFER; + optvals[3] = &elog[0]; + + opts[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + optvals[4] = (void *) logsize; + + opts[5] = CU_JIT_LOG_VERBOSE; + optvals[5] = (void *) 1; + + opts[6] = CU_JIT_TARGET; + optvals[6] = (void *) CU_TARGET_COMPUTE_30; + + r = cuLinkCreate (7, opts, optvals, &linkstate); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r)); + + char *abort_ptx = ABORT_PTX; + r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx, + strlen (abort_ptx) + 1, 0, 0, 0, 0); + if (r != CUDA_SUCCESS) + { + GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); + GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r)); + } + + char *acc_on_device_ptx = ACC_ON_DEVICE_PTX; + r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx, + strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0); + if (r != CUDA_SUCCESS) + { + GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); + GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s", + cuda_error (r)); + } + + char *goacc_internal_ptx = GOACC_INTERNAL_PTX; + r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx, + strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0); + if (r != CUDA_SUCCESS) + { + GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); + GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s", + cuda_error (r)); + } + + r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code, + strlen (ptx_code) + 1, 0, 0, 0, 0); + if (r != CUDA_SUCCESS) + { + GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); + GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r)); + } + + r = cuLinkComplete (linkstate, &linkout, &linkoutsize); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r)); + + GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed); + GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]); + + r = cuModuleLoadData (module, linkout); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r)); +} + +static void +event_gc (bool memmap_lockable) +{ + struct ptx_event *ptx_event = ptx_events; + struct nvptx_thread *nvthd = nvptx_thread (); + + pthread_mutex_lock (&ptx_event_lock); + + while (ptx_event != NULL) + { + CUresult r; + struct ptx_event *e = ptx_event; + + ptx_event = ptx_event->next; + + if (e->ord != nvthd->ptx_dev->ord) + continue; + + r = cuEventQuery (*e->evt); + if (r == CUDA_SUCCESS) + { + CUevent *te; + + te = e->evt; + + switch (e->type) + { + case PTX_EVT_MEM: + case PTX_EVT_SYNC: + break; + + case PTX_EVT_KNL: + map_pop (e->addr); + break; + + case PTX_EVT_ASYNC_CLEANUP: + { + /* The function gomp_plugin_async_unmap_vars needs to claim the + memory-map splay tree lock for the current device, so we + can't call it when one of our callers has already claimed + the lock. In that case, just delay the GC for this event + until later. */ + if (!memmap_lockable) + continue; + + GOMP_PLUGIN_async_unmap_vars (e->addr); + } + break; + } + + cuEventDestroy (*te); + free ((void *)te); + + if (ptx_events == e) + ptx_events = ptx_events->next; + else + { + struct ptx_event *e_ = ptx_events; + while (e_->next != e) + e_ = e_->next; + e_->next = e_->next->next; + } + + free (e); + } + } + + pthread_mutex_unlock (&ptx_event_lock); +} + +static void +event_add (enum ptx_event_type type, CUevent *e, void *h) +{ + struct ptx_event *ptx_event; + struct nvptx_thread *nvthd = nvptx_thread (); + + assert (type == PTX_EVT_MEM || type == PTX_EVT_KNL || type == PTX_EVT_SYNC + || type == PTX_EVT_ASYNC_CLEANUP); + + ptx_event = GOMP_PLUGIN_malloc (sizeof (struct ptx_event)); + ptx_event->type = type; + ptx_event->evt = e; + ptx_event->addr = h; + ptx_event->ord = nvthd->ptx_dev->ord; + + pthread_mutex_lock (&ptx_event_lock); + + ptx_event->next = ptx_events; + ptx_events = ptx_event; + + pthread_mutex_unlock (&ptx_event_lock); +} + +void +nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, + size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers, + int vector_length, int async, void *targ_mem_desc) +{ + struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn; + CUfunction function; + CUresult r; + int i; + struct ptx_stream *dev_str; + void *kargs[1]; + void *hp, *dp; + unsigned int nthreads_in_block; + struct nvptx_thread *nvthd = nvptx_thread (); + const char *maybe_abort_msg = "(perhaps abort was called)"; + + function = targ_fn->fn; + + dev_str = select_stream_for_async (async, pthread_self (), false, NULL); + assert (dev_str == nvthd->current_stream); + + /* This reserves a chunk of a pre-allocated page of memory mapped on both + the host and the device. HP is a host pointer to the new chunk, and DP is + the corresponding device pointer. */ + map_push (dev_str, async, mapnum * sizeof (void *), &hp, &dp); + + GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); + + /* Copy the array of arguments to the mapped page. */ + for (i = 0; i < mapnum; i++) + ((void **) hp)[i] = devaddrs[i]; + + /* Copy the (device) pointers to arguments to the device (dp and hp might in + fact have the same value on a unified-memory system). */ + r = cuMemcpy ((CUdeviceptr)dp, (CUdeviceptr)hp, mapnum * sizeof (void *)); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpy failed: %s", cuda_error (r)); + + GOMP_PLUGIN_debug (0, " %s: kernel %s: launch\n", __FUNCTION__, targ_fn->name); + + // OpenACC CUDA + // + // num_gangs blocks + // num_workers warps (where a warp is equivalent to 32 threads) + // vector length threads + // + + /* The openacc vector_length clause 'determines the vector length to use for + vector or SIMD operations'. The question is how to map this to CUDA. + + In CUDA, the warp size is the vector length of a CUDA device. However, the + CUDA interface abstracts away from that, and only shows us warp size + indirectly in maximum number of threads per block, which is a product of + warp size and the number of hyperthreads of a multiprocessor. + + We choose to map openacc vector_length directly onto the number of threads + in a block, in the x dimension. This is reflected in gcc code generation + that uses ThreadIdx.x to access vector elements. + + Attempting to use an openacc vector_length of more than the maximum number + of threads per block will result in a cuda error. */ + nthreads_in_block = vector_length; + + kargs[0] = &dp; + r = cuLaunchKernel (function, + num_gangs, 1, 1, + nthreads_in_block, 1, 1, + 0, dev_str->stream, kargs, 0); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r)); + +#ifndef DISABLE_ASYNC + if (async < acc_async_noval) + { + r = cuStreamSynchronize (dev_str->stream); + if (r == CUDA_ERROR_LAUNCH_FAILED) + GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s %s\n", cuda_error (r), + maybe_abort_msg); + else if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r)); + } + else + { + CUevent *e; + + e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent)); + + r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING); + if (r == CUDA_ERROR_LAUNCH_FAILED) + GOMP_PLUGIN_fatal ("cuEventCreate error: %s %s\n", cuda_error (r), + maybe_abort_msg); + else if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r)); + + event_gc (true); + + r = cuEventRecord (*e, dev_str->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r)); + + event_add (PTX_EVT_KNL, e, (void *)dev_str); + } +#else + r = cuCtxSynchronize (); + if (r == CUDA_ERROR_LAUNCH_FAILED) + GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r), + maybe_abort_msg); + else if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r)); +#endif + + GOMP_PLUGIN_debug (0, " %s: kernel %s: finished\n", __FUNCTION__, + targ_fn->name); + +#ifndef DISABLE_ASYNC + if (async < acc_async_noval) +#endif + map_pop (dev_str); +} + +void * openacc_get_current_cuda_context (void); + +static void * +nvptx_alloc (size_t s) +{ + CUdeviceptr d; + CUresult r; + + r = cuMemAlloc (&d, s); + if (r == CUDA_ERROR_OUT_OF_MEMORY) + return 0; + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemAlloc error: %s", cuda_error (r)); + return (void *)d; +} + +static void +nvptx_free (void *p) +{ + CUresult r; + CUdeviceptr pb; + size_t ps; + + r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)p); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r)); + + if ((CUdeviceptr)p != pb) + GOMP_PLUGIN_fatal ("invalid device address"); + + r = cuMemFree ((CUdeviceptr)p); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemFree error: %s", cuda_error (r)); +} + +static void * +nvptx_host2dev (void *d, const void *h, size_t s) +{ + CUresult r; + CUdeviceptr pb; + size_t ps; + struct nvptx_thread *nvthd = nvptx_thread (); + + if (!s) + return 0; + + if (!d) + GOMP_PLUGIN_fatal ("invalid device address"); + + r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)d); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r)); + + if (!pb) + GOMP_PLUGIN_fatal ("invalid device address"); + + if (!h) + GOMP_PLUGIN_fatal ("invalid host address"); + + if (d == h) + GOMP_PLUGIN_fatal ("invalid host or device address"); + + if ((void *)(d + s) > (void *)(pb + ps)) + GOMP_PLUGIN_fatal ("invalid size"); + +#ifndef DISABLE_ASYNC + if (nvthd->current_stream != nvthd->ptx_dev->null_stream) + { + CUevent *e; + + e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent)); + + r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r)); + + event_gc (false); + + r = cuMemcpyHtoDAsync ((CUdeviceptr)d, h, s, + nvthd->current_stream->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpyHtoDAsync error: %s", cuda_error (r)); + + r = cuEventRecord (*e, nvthd->current_stream->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r)); + + event_add (PTX_EVT_MEM, e, (void *)h); + } + else +#endif + { + r = cuMemcpyHtoD ((CUdeviceptr)d, h, s); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r)); + } + + return 0; +} + +static void * +nvptx_dev2host (void *h, const void *d, size_t s) +{ + CUresult r; + CUdeviceptr pb; + size_t ps; + struct nvptx_thread *nvthd = nvptx_thread (); + + if (!s) + return 0; + + if (!d) + GOMP_PLUGIN_fatal ("invalid device address"); + + r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)d); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r)); + + if (!pb) + GOMP_PLUGIN_fatal ("invalid device address"); + + if (!h) + GOMP_PLUGIN_fatal ("invalid host address"); + + if (d == h) + GOMP_PLUGIN_fatal ("invalid host or device address"); + + if ((void *)(d + s) > (void *)(pb + ps)) + GOMP_PLUGIN_fatal ("invalid size"); + +#ifndef DISABLE_ASYNC + if (nvthd->current_stream != nvthd->ptx_dev->null_stream) + { + CUevent *e; + + e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent)); + + r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventCreate error: %s\n", cuda_error (r)); + + event_gc (false); + + r = cuMemcpyDtoHAsync (h, (CUdeviceptr)d, s, + nvthd->current_stream->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpyDtoHAsync error: %s", cuda_error (r)); + + r = cuEventRecord (*e, nvthd->current_stream->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r)); + + event_add (PTX_EVT_MEM, e, (void *)h); + } + else +#endif + { + r = cuMemcpyDtoH (h, (CUdeviceptr)d, s); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r)); + } + + return 0; +} + +static void +nvptx_set_async (int async) +{ + struct nvptx_thread *nvthd = nvptx_thread (); + nvthd->current_stream + = select_stream_for_async (async, pthread_self (), true, NULL); +} + +static int +nvptx_async_test (int async) +{ + CUresult r; + struct ptx_stream *s; + + s = select_stream_for_async (async, pthread_self (), false, NULL); + + if (!s) + GOMP_PLUGIN_fatal ("unknown async %d", async); + + r = cuStreamQuery (s->stream); + if (r == CUDA_SUCCESS) + { + /* The oacc-parallel.c:goacc_wait function calls this hook to determine + whether all work has completed on this stream, and if so omits the call + to the wait hook. If that happens, event_gc might not get called + (which prevents variables from getting unmapped and their associated + device storage freed), so call it here. */ + event_gc (true); + return 1; + } + else if (r == CUDA_ERROR_NOT_READY) + return 0; + + GOMP_PLUGIN_fatal ("cuStreamQuery error: %s", cuda_error (r)); + + return 0; +} + +static int +nvptx_async_test_all (void) +{ + struct ptx_stream *s; + pthread_t self = pthread_self (); + struct nvptx_thread *nvthd = nvptx_thread (); + + pthread_mutex_lock (&nvthd->ptx_dev->stream_lock); + + for (s = nvthd->ptx_dev->active_streams; s != NULL; s = s->next) + { + if ((s->multithreaded || pthread_equal (s->host_thread, self)) + && cuStreamQuery (s->stream) == CUDA_ERROR_NOT_READY) + { + pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock); + return 0; + } + } + + pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock); + + event_gc (true); + + return 1; +} + +static void +nvptx_wait (int async) +{ + CUresult r; + struct ptx_stream *s; + + s = select_stream_for_async (async, pthread_self (), false, NULL); + + if (!s) + GOMP_PLUGIN_fatal ("unknown async %d", async); + + r = cuStreamSynchronize (s->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r)); + + event_gc (true); +} + +static void +nvptx_wait_async (int async1, int async2) +{ + CUresult r; + CUevent *e; + struct ptx_stream *s1, *s2; + pthread_t self = pthread_self (); + + /* The stream that is waiting (rather than being waited for) doesn't + necessarily have to exist already. */ + s2 = select_stream_for_async (async2, self, true, NULL); + + s1 = select_stream_for_async (async1, self, false, NULL); + if (!s1) + GOMP_PLUGIN_fatal ("invalid async 1\n"); + + if (s1 == s2) + GOMP_PLUGIN_fatal ("identical parameters"); + + e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent)); + + r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r)); + + event_gc (true); + + r = cuEventRecord (*e, s1->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r)); + + event_add (PTX_EVT_SYNC, e, NULL); + + r = cuStreamWaitEvent (s2->stream, *e, 0); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r)); +} + +static void +nvptx_wait_all (void) +{ + CUresult r; + struct ptx_stream *s; + pthread_t self = pthread_self (); + struct nvptx_thread *nvthd = nvptx_thread (); + + pthread_mutex_lock (&nvthd->ptx_dev->stream_lock); + + /* Wait for active streams initiated by this thread (or by multiple threads) + to complete. */ + for (s = nvthd->ptx_dev->active_streams; s != NULL; s = s->next) + { + if (s->multithreaded || pthread_equal (s->host_thread, self)) + { + r = cuStreamQuery (s->stream); + if (r == CUDA_SUCCESS) + continue; + else if (r != CUDA_ERROR_NOT_READY) + GOMP_PLUGIN_fatal ("cuStreamQuery error: %s", cuda_error (r)); + + r = cuStreamSynchronize (s->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r)); + } + } + + pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock); + + event_gc (true); +} + +static void +nvptx_wait_all_async (int async) +{ + CUresult r; + struct ptx_stream *waiting_stream, *other_stream; + CUevent *e; + struct nvptx_thread *nvthd = nvptx_thread (); + pthread_t self = pthread_self (); + + /* The stream doing the waiting. This could be the first mention of the + stream, so create it if necessary. */ + waiting_stream + = select_stream_for_async (async, pthread_self (), true, NULL); + + /* Launches on the null stream already block on other streams in the + context. */ + if (!waiting_stream || waiting_stream == nvthd->ptx_dev->null_stream) + return; + + event_gc (true); + + pthread_mutex_lock (&nvthd->ptx_dev->stream_lock); + + for (other_stream = nvthd->ptx_dev->active_streams; + other_stream != NULL; + other_stream = other_stream->next) + { + if (!other_stream->multithreaded + && !pthread_equal (other_stream->host_thread, self)) + continue; + + e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent)); + + r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r)); + + /* Record an event on the waited-for stream. */ + r = cuEventRecord (*e, other_stream->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r)); + + event_add (PTX_EVT_SYNC, e, NULL); + + r = cuStreamWaitEvent (waiting_stream->stream, *e, 0); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r)); + } + + pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock); +} + +static void * +nvptx_get_current_cuda_device (void) +{ + struct nvptx_thread *nvthd = nvptx_thread (); + + if (!nvthd || !nvthd->ptx_dev) + return NULL; + + return &nvthd->ptx_dev->dev; +} + +static void * +nvptx_get_current_cuda_context (void) +{ + struct nvptx_thread *nvthd = nvptx_thread (); + + if (!nvthd || !nvthd->ptx_dev) + return NULL; + + return nvthd->ptx_dev->ctx; +} + +static void * +nvptx_get_cuda_stream (int async) +{ + struct ptx_stream *s; + struct nvptx_thread *nvthd = nvptx_thread (); + + if (!nvthd || !nvthd->ptx_dev) + return NULL; + + s = select_stream_for_async (async, pthread_self (), false, NULL); + + return s ? s->stream : NULL; +} + +static int +nvptx_set_cuda_stream (int async, void *stream) +{ + struct ptx_stream *oldstream; + pthread_t self = pthread_self (); + struct nvptx_thread *nvthd = nvptx_thread (); + + pthread_mutex_lock (&nvthd->ptx_dev->stream_lock); + + if (async < 0) + GOMP_PLUGIN_fatal ("bad async %d", async); + + /* We have a list of active streams and an array mapping async values to + entries of that list. We need to take "ownership" of the passed-in stream, + and add it to our list, removing the previous entry also (if there was one) + in order to prevent resource leaks. Note the potential for surprise + here: maybe we should keep track of passed-in streams and leave it up to + the user to tidy those up, but that doesn't work for stream handles + returned from acc_get_cuda_stream above... */ + + oldstream = select_stream_for_async (async, self, false, NULL); + + if (oldstream) + { + if (nvthd->ptx_dev->active_streams == oldstream) + nvthd->ptx_dev->active_streams = nvthd->ptx_dev->active_streams->next; + else + { + struct ptx_stream *s = nvthd->ptx_dev->active_streams; + while (s->next != oldstream) + s = s->next; + s->next = s->next->next; + } + + cuStreamDestroy (oldstream->stream); + map_fini (oldstream); + free (oldstream); + } + + pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock); + + (void) select_stream_for_async (async, self, true, (CUstream) stream); + + return 1; +} + +/* Plugin entry points. */ + +const char * +GOMP_OFFLOAD_get_name (void) +{ + return "nvptx"; +} + +unsigned int +GOMP_OFFLOAD_get_caps (void) +{ + return GOMP_OFFLOAD_CAP_OPENACC_200; +} + +int +GOMP_OFFLOAD_get_type (void) +{ + return OFFLOAD_TARGET_TYPE_NVIDIA_PTX; +} + +int +GOMP_OFFLOAD_get_num_devices (void) +{ + return nvptx_get_num_devices (); +} + +static void **kernel_target_data; +static void **kernel_host_table; + +void +GOMP_OFFLOAD_register_image (void *host_table, void *target_data) +{ + kernel_target_data = target_data; + kernel_host_table = host_table; +} + +void +GOMP_OFFLOAD_init_device (int n __attribute__ ((unused))) +{ + (void) nvptx_init (); +} + +void +GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused))) +{ + nvptx_fini (); +} + +int +GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)), + struct mapping_table **tablep) +{ + CUmodule module; + void **fn_table; + char **fn_names; + int fn_entries, i; + CUresult r; + struct targ_fn_descriptor *targ_fns; + + if (nvptx_init () <= 0) + return 0; + + /* This isn't an error, because an image may legitimately have no offloaded + regions and so will not call GOMP_offload_register. */ + if (kernel_target_data == NULL) + return 0; + + link_ptx (&module, kernel_target_data[0]); + + /* kernel_target_data[0] -> ptx code + kernel_target_data[1] -> variable mappings + kernel_target_data[2] -> array of kernel names in ascii + + kernel_host_table[0] -> start of function addresses (__offload_func_table) + kernel_host_table[1] -> end of function addresses (__offload_funcs_end) + + The array of kernel names and the functions addresses form a + one-to-one correspondence. */ + + fn_table = kernel_host_table[0]; + fn_names = (char **) kernel_target_data[2]; + fn_entries = (kernel_host_table[1] - kernel_host_table[0]) / sizeof (void *); + + *tablep = GOMP_PLUGIN_malloc (sizeof (struct mapping_table) * fn_entries); + targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor) + * fn_entries); + + for (i = 0; i < fn_entries; i++) + { + CUfunction function; + + r = cuModuleGetFunction (&function, module, fn_names[i]); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r)); + + targ_fns[i].fn = function; + targ_fns[i].name = (const char *) fn_names[i]; + + (*tablep)[i].host_start = (uintptr_t) fn_table[i]; + (*tablep)[i].host_end = (*tablep)[i].host_start + 1; + (*tablep)[i].tgt_start = (uintptr_t) &targ_fns[i]; + (*tablep)[i].tgt_end = (*tablep)[i].tgt_start + 1; + } + + return fn_entries; +} + +void * +GOMP_OFFLOAD_alloc (int n __attribute__ ((unused)), size_t size) +{ + return nvptx_alloc (size); +} + +void +GOMP_OFFLOAD_free (int n __attribute__ ((unused)), void *ptr) +{ + nvptx_free (ptr); +} + +void * +GOMP_OFFLOAD_dev2host (int ord __attribute__ ((unused)), void *dst, + const void *src, size_t n) +{ + return nvptx_dev2host (dst, src, n); +} + +void * +GOMP_OFFLOAD_host2dev (int ord __attribute__ ((unused)), void *dst, + const void *src, size_t n) +{ + return nvptx_host2dev (dst, src, n); +} + +void (*device_run) (int n, void *fn_ptr, void *vars) = NULL; + +void +GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *), size_t mapnum, + void **hostaddrs, void **devaddrs, size_t *sizes, + unsigned short *kinds, int num_gangs, + int num_workers, int vector_length, int async, + void *targ_mem_desc) +{ + nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs, + num_workers, vector_length, async, targ_mem_desc); +} + +void * +GOMP_OFFLOAD_openacc_open_device (int n) +{ + return nvptx_open_device (n); +} + +int +GOMP_OFFLOAD_openacc_close_device (void *h) +{ + return nvptx_close_device (h); +} + +void +GOMP_OFFLOAD_openacc_set_device_num (int n) +{ + struct nvptx_thread *nvthd = nvptx_thread (); + + assert (n >= 0); + + if (!nvthd->ptx_dev || nvthd->ptx_dev->ord != n) + (void) nvptx_open_device (n); +} + +/* This can be called before the device is "opened" for the current thread, in + which case we can't tell which device number should be returned. We don't + actually want to open the device here, so just return -1 and let the caller + (oacc-init.c:acc_get_device_num) handle it. */ + +int +GOMP_OFFLOAD_openacc_get_device_num (void) +{ + struct nvptx_thread *nvthd = nvptx_thread (); + + if (nvthd && nvthd->ptx_dev) + return nvthd->ptx_dev->ord; + else + return -1; +} + +void +GOMP_OFFLOAD_openacc_register_async_cleanup (void *targ_mem_desc) +{ + CUevent *e; + CUresult r; + struct nvptx_thread *nvthd = nvptx_thread (); + + e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent)); + + r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r)); + + r = cuEventRecord (*e, nvthd->current_stream->stream); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r)); + + event_add (PTX_EVT_ASYNC_CLEANUP, e, targ_mem_desc); +} + +int +GOMP_OFFLOAD_openacc_async_test (int async) +{ + return nvptx_async_test (async); +} + +int +GOMP_OFFLOAD_openacc_async_test_all (void) +{ + return nvptx_async_test_all (); +} + +void +GOMP_OFFLOAD_openacc_async_wait (int async) +{ + nvptx_wait (async); +} + +void +GOMP_OFFLOAD_openacc_async_wait_async (int async1, int async2) +{ + nvptx_wait_async (async1, async2); +} + +void +GOMP_OFFLOAD_openacc_async_wait_all (void) +{ + nvptx_wait_all (); +} + +void +GOMP_OFFLOAD_openacc_async_wait_all_async (int async) +{ + nvptx_wait_all_async (async); +} + +void +GOMP_OFFLOAD_openacc_async_set_async (int async) +{ + nvptx_set_async (async); +} + +void * +GOMP_OFFLOAD_openacc_create_thread_data (void *targ_data) +{ + struct ptx_device *ptx_dev = (struct ptx_device *) targ_data; + struct nvptx_thread *nvthd + = GOMP_PLUGIN_malloc (sizeof (struct nvptx_thread)); + CUresult r; + CUcontext thd_ctx; + + r = cuCtxGetCurrent (&thd_ctx); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r)); + + assert (ptx_dev->ctx); + + if (!thd_ctx) + { + r = cuCtxPushCurrent (ptx_dev->ctx); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuCtxPushCurrent error: %s", cuda_error (r)); + } + + nvthd->current_stream = ptx_dev->null_stream; + nvthd->ptx_dev = ptx_dev; + + return (void *) nvthd; +} + +void +GOMP_OFFLOAD_openacc_destroy_thread_data (void *data) +{ + free (data); +} + +void * +GOMP_OFFLOAD_openacc_get_current_cuda_device (void) +{ + return nvptx_get_current_cuda_device (); +} + +void * +GOMP_OFFLOAD_openacc_get_current_cuda_context (void) +{ + return nvptx_get_current_cuda_context (); +} + +/* NOTE: This returns a CUstream, not a ptx_stream pointer. */ + +void * +GOMP_OFFLOAD_openacc_get_cuda_stream (int async) +{ + return nvptx_get_cuda_stream (async); +} + +/* NOTE: This takes a CUstream, not a ptx_stream pointer. */ + +int +GOMP_OFFLOAD_openacc_set_cuda_stream (int async, void *stream) +{ + return nvptx_set_cuda_stream (async, stream); +} diff --git a/libgomp/splay-tree.c b/libgomp/splay-tree.c new file mode 100644 index 00000000000..030ca8f6c45 --- /dev/null +++ b/libgomp/splay-tree.c @@ -0,0 +1,217 @@ +/* A splay-tree datatype. + Copyright (C) 1998-2015 Free Software Foundation, Inc. + Contributed by Mark Mitchell (mark@markmitchell.com). + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* The splay tree code copied from include/splay-tree.h and adjusted, + so that all the data lives directly in splay_tree_node_s structure + and no extra allocations are needed. */ + +/* For an easily readable description of splay-trees, see: + + Lewis, Harry R. and Denenberg, Larry. Data Structures and Their + Algorithms. Harper-Collins, Inc. 1991. + + The major feature of splay trees is that all basic tree operations + are amortized O(log n) time for a tree with n nodes. */ + +#include "libgomp.h" +#include "splay-tree.h" + +extern int splay_compare (splay_tree_key, splay_tree_key); + +/* Rotate the edge joining the left child N with its parent P. PP is the + grandparents' pointer to P. */ + +static inline void +rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) +{ + splay_tree_node tmp; + tmp = n->right; + n->right = p; + p->left = tmp; + *pp = n; +} + +/* Rotate the edge joining the right child N with its parent P. PP is the + grandparents' pointer to P. */ + +static inline void +rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) +{ + splay_tree_node tmp; + tmp = n->left; + n->left = p; + p->right = tmp; + *pp = n; +} + +/* Bottom up splay of KEY. */ + +static void +splay_tree_splay (splay_tree sp, splay_tree_key key) +{ + if (sp->root == NULL) + return; + + do { + int cmp1, cmp2; + splay_tree_node n, c; + + n = sp->root; + cmp1 = splay_compare (key, &n->key); + + /* Found. */ + if (cmp1 == 0) + return; + + /* Left or right? If no child, then we're done. */ + if (cmp1 < 0) + c = n->left; + else + c = n->right; + if (!c) + return; + + /* Next one left or right? If found or no child, we're done + after one rotation. */ + cmp2 = splay_compare (key, &c->key); + if (cmp2 == 0 + || (cmp2 < 0 && !c->left) + || (cmp2 > 0 && !c->right)) + { + if (cmp1 < 0) + rotate_left (&sp->root, n, c); + else + rotate_right (&sp->root, n, c); + return; + } + + /* Now we have the four cases of double-rotation. */ + if (cmp1 < 0 && cmp2 < 0) + { + rotate_left (&n->left, c, c->left); + rotate_left (&sp->root, n, n->left); + } + else if (cmp1 > 0 && cmp2 > 0) + { + rotate_right (&n->right, c, c->right); + rotate_right (&sp->root, n, n->right); + } + else if (cmp1 < 0 && cmp2 > 0) + { + rotate_right (&n->left, c, c->right); + rotate_left (&sp->root, n, n->left); + } + else if (cmp1 > 0 && cmp2 < 0) + { + rotate_left (&n->right, c, c->left); + rotate_right (&sp->root, n, n->right); + } + } while (1); +} + +/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ + +attribute_hidden void +splay_tree_insert (splay_tree sp, splay_tree_node node) +{ + int comparison = 0; + + splay_tree_splay (sp, &node->key); + + if (sp->root) + comparison = splay_compare (&sp->root->key, &node->key); + + if (sp->root && comparison == 0) + gomp_fatal ("Duplicate node"); + else + { + /* Insert it at the root. */ + if (sp->root == NULL) + node->left = node->right = NULL; + else if (comparison < 0) + { + node->left = sp->root; + node->right = node->left->right; + node->left->right = NULL; + } + else + { + node->right = sp->root; + node->left = node->right->left; + node->right->left = NULL; + } + + sp->root = node; + } +} + +/* Remove node with KEY from SP. It is not an error if it did not exist. */ + +attribute_hidden void +splay_tree_remove (splay_tree sp, splay_tree_key key) +{ + splay_tree_splay (sp, key); + + if (sp->root && splay_compare (&sp->root->key, key) == 0) + { + splay_tree_node left, right; + + left = sp->root->left; + right = sp->root->right; + + /* One of the children is now the root. Doesn't matter much + which, so long as we preserve the properties of the tree. */ + if (left) + { + sp->root = left; + + /* If there was a right child as well, hang it off the + right-most leaf of the left child. */ + if (right) + { + while (left->right) + left = left->right; + left->right = right; + } + } + else + sp->root = right; + } +} + +/* Lookup KEY in SP, returning NODE if present, and NULL + otherwise. */ + +attribute_hidden splay_tree_key +splay_tree_lookup (splay_tree sp, splay_tree_key key) +{ + splay_tree_splay (sp, key); + + if (sp->root && splay_compare (&sp->root->key, key) == 0) + return &sp->root->key; + else + return NULL; +} diff --git a/libgomp/splay-tree.h b/libgomp/splay-tree.h index 1296be65f87..085021cf3d6 100644 --- a/libgomp/splay-tree.h +++ b/libgomp/splay-tree.h @@ -43,6 +43,9 @@ typedef struct splay_tree_key_s *splay_tree_key; The major feature of splay trees is that all basic tree operations are amortized O(log n) time for a tree with n nodes. */ +#ifndef _SPLAY_TREE_H +#define _SPLAY_TREE_H 1 + /* The nodes in the splay tree. */ struct splay_tree_node_s { struct splay_tree_key_s key; @@ -56,177 +59,8 @@ struct splay_tree_s { splay_tree_node root; }; -/* Rotate the edge joining the left child N with its parent P. PP is the - grandparents' pointer to P. */ +extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); +extern void splay_tree_insert (splay_tree, splay_tree_node); +extern void splay_tree_remove (splay_tree, splay_tree_key); -static inline void -rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) -{ - splay_tree_node tmp; - tmp = n->right; - n->right = p; - p->left = tmp; - *pp = n; -} - -/* Rotate the edge joining the right child N with its parent P. PP is the - grandparents' pointer to P. */ - -static inline void -rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) -{ - splay_tree_node tmp; - tmp = n->left; - n->left = p; - p->right = tmp; - *pp = n; -} - -/* Bottom up splay of KEY. */ - -static void -splay_tree_splay (splay_tree sp, splay_tree_key key) -{ - if (sp->root == NULL) - return; - - do { - int cmp1, cmp2; - splay_tree_node n, c; - - n = sp->root; - cmp1 = splay_compare (key, &n->key); - - /* Found. */ - if (cmp1 == 0) - return; - - /* Left or right? If no child, then we're done. */ - if (cmp1 < 0) - c = n->left; - else - c = n->right; - if (!c) - return; - - /* Next one left or right? If found or no child, we're done - after one rotation. */ - cmp2 = splay_compare (key, &c->key); - if (cmp2 == 0 - || (cmp2 < 0 && !c->left) - || (cmp2 > 0 && !c->right)) - { - if (cmp1 < 0) - rotate_left (&sp->root, n, c); - else - rotate_right (&sp->root, n, c); - return; - } - - /* Now we have the four cases of double-rotation. */ - if (cmp1 < 0 && cmp2 < 0) - { - rotate_left (&n->left, c, c->left); - rotate_left (&sp->root, n, n->left); - } - else if (cmp1 > 0 && cmp2 > 0) - { - rotate_right (&n->right, c, c->right); - rotate_right (&sp->root, n, n->right); - } - else if (cmp1 < 0 && cmp2 > 0) - { - rotate_right (&n->left, c, c->right); - rotate_left (&sp->root, n, n->left); - } - else if (cmp1 > 0 && cmp2 < 0) - { - rotate_left (&n->right, c, c->left); - rotate_right (&sp->root, n, n->right); - } - } while (1); -} - -/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ - -static void -splay_tree_insert (splay_tree sp, splay_tree_node node) -{ - int comparison = 0; - - splay_tree_splay (sp, &node->key); - - if (sp->root) - comparison = splay_compare (&sp->root->key, &node->key); - - if (sp->root && comparison == 0) - abort (); - else - { - /* Insert it at the root. */ - if (sp->root == NULL) - node->left = node->right = NULL; - else if (comparison < 0) - { - node->left = sp->root; - node->right = node->left->right; - node->left->right = NULL; - } - else - { - node->right = sp->root; - node->left = node->right->left; - node->right->left = NULL; - } - - sp->root = node; - } -} - -/* Remove node with KEY from SP. It is not an error if it did not exist. */ - -static void -splay_tree_remove (splay_tree sp, splay_tree_key key) -{ - splay_tree_splay (sp, key); - - if (sp->root && splay_compare (&sp->root->key, key) == 0) - { - splay_tree_node left, right; - - left = sp->root->left; - right = sp->root->right; - - /* One of the children is now the root. Doesn't matter much - which, so long as we preserve the properties of the tree. */ - if (left) - { - sp->root = left; - - /* If there was a right child as well, hang it off the - right-most leaf of the left child. */ - if (right) - { - while (left->right) - left = left->right; - left->right = right; - } - } - else - sp->root = right; - } -} - -/* Lookup KEY in SP, returning NODE if present, and NULL - otherwise. */ - -static splay_tree_key -splay_tree_lookup (splay_tree sp, splay_tree_key key) -{ - splay_tree_splay (sp, key); - - if (sp->root && splay_compare (&sp->root->key, key) == 0) - return &sp->root->key; - else - return NULL; -} +#endif /* _SPLAY_TREE_H */ diff --git a/libgomp/target.c b/libgomp/target.c index ec097de9ca2..83ad51108d0 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -27,11 +27,14 @@ #include "config.h" #include "libgomp.h" -#include "libgomp_target.h" +#include "oacc-plugin.h" +#include "oacc-int.h" +#include "gomp-constants.h" #include #include #include #include +#include #ifdef PLUGIN_SUPPORT #include @@ -39,52 +42,9 @@ static void gomp_target_init (void); +/* The whole initialization code for offloading plugins is only run one. */ static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; -/* Forward declaration for a node in the tree. */ -typedef struct splay_tree_node_s *splay_tree_node; -typedef struct splay_tree_s *splay_tree; -typedef struct splay_tree_key_s *splay_tree_key; - -struct target_mem_desc { - /* Reference count. */ - uintptr_t refcount; - /* All the splay nodes allocated together. */ - splay_tree_node array; - /* Start of the target region. */ - uintptr_t tgt_start; - /* End of the targer region. */ - uintptr_t tgt_end; - /* Handle to free. */ - void *to_free; - /* Previous target_mem_desc. */ - struct target_mem_desc *prev; - /* Number of items in following list. */ - size_t list_count; - - /* Corresponding target device descriptor. */ - struct gomp_device_descr *device_descr; - - /* List of splay keys to remove (or decrease refcount) - at the end of region. */ - splay_tree_key list[]; -}; - -struct splay_tree_key_s { - /* Address of the host object. */ - uintptr_t host_start; - /* Address immediately after the host object. */ - uintptr_t host_end; - /* Descriptor of the target memory. */ - struct target_mem_desc *tgt; - /* Offset from tgt->tgt_start to the start of the target object. */ - uintptr_t tgt_offset; - /* Reference count. */ - uintptr_t refcount; - /* True if data should be copied from device to host at the end. */ - bool copy_from; -}; - /* This structure describes an offload image. It contains type of the target device, pointer to host table descriptor, and pointer to target data. */ @@ -106,9 +66,12 @@ static struct gomp_device_descr *devices; /* Total number of available devices. */ static int num_devices; +/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ +static int num_devices_openmp; + /* The comparison function. */ -static int +attribute_hidden int splay_compare (splay_tree_key x, splay_tree_key y) { if (x->host_start == x->host_end @@ -123,54 +86,23 @@ splay_compare (splay_tree_key x, splay_tree_key y) #include "splay-tree.h" -/* This structure describes accelerator device. - It contains ID-number of the device, its type, function handlers for - interaction with the device, and information about mapped memory. */ -struct gomp_device_descr +attribute_hidden void +gomp_init_targets_once (void) { - /* This is the ID number of device. It could be specified in DEVICE-clause of - TARGET construct. */ - int id; - - /* This is the ID number of device among devices of the same type. */ - int target_id; - - /* This is the TYPE of device. */ - enum offload_target_type type; - - /* Set to true when device is initialized. */ - bool is_initialized; - - /* Function handlers. */ - int (*get_type_func) (void); - int (*get_num_devices_func) (void); - void (*register_image_func) (void *, void *); - void (*init_device_func) (int); - int (*get_table_func) (int, void *); - void *(*alloc_func) (int, size_t); - void (*free_func) (int, void *); - void *(*host2dev_func) (int, void *, const void *, size_t); - void *(*dev2host_func) (int, void *, const void *, size_t); - void (*run_func) (int, void *, void *); - - /* Splay tree containing information about mapped memory regions. */ - struct splay_tree_s dev_splay_tree; - - /* Mutex for operating with the splay tree and other shared structures. */ - gomp_mutex_t dev_env_lock; -}; + (void) pthread_once (&gomp_is_initialized, gomp_target_init); +} attribute_hidden int gomp_get_num_devices (void) { - (void) pthread_once (&gomp_is_initialized, gomp_target_init); - return num_devices; + gomp_init_targets_once (); + return num_devices_openmp; } static struct gomp_device_descr * resolve_device (int device_id) { - if (device_id == -1) + if (device_id == GOMP_DEVICE_ICV) { struct gomp_task_icv *icv = gomp_icv (false); device_id = icv->default_device_var; @@ -190,27 +122,39 @@ static inline void gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn, unsigned char kind) { - if (oldn->host_start > newn->host_start + if ((kind & GOMP_MAP_FLAG_FORCE) + || oldn->host_start > newn->host_start || oldn->host_end < newn->host_end) - gomp_fatal ("Trying to map into device [%p..%p) object when" + gomp_fatal ("Trying to map into device [%p..%p) object when " "[%p..%p) is already mapped", (void *) newn->host_start, (void *) newn->host_end, (void *) oldn->host_start, (void *) oldn->host_end); oldn->refcount++; } -static struct target_mem_desc * +static int +get_kind (bool is_openacc, void *kinds, int idx) +{ + return is_openacc ? ((unsigned short *) kinds)[idx] + : ((unsigned char *) kinds)[idx]; +} + +attribute_hidden struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds, - bool is_target) + void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, + bool is_openacc, bool is_target) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; + const int rshift = is_openacc ? 8 : 3; + const int typemask = is_openacc ? 0xff : 0x7; + struct gomp_memory_mapping *mm = &devicep->mem_map; struct splay_tree_key_s cur_node; struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; tgt->refcount = 1; tgt->device_descr = devicep; + tgt->mem_map = mm; if (mapnum == 0) return tgt; @@ -224,40 +168,43 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt_size = mapnum * sizeof (void *); } - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&mm->lock); + for (i = 0; i < mapnum; i++) { + int kind = get_kind (is_openacc, kinds, i); if (hostaddrs[i] == NULL) { tgt->list[i] = NULL; continue; } cur_node.host_start = (uintptr_t) hostaddrs[i]; - if ((kinds[i] & 7) != 4) + if (!GOMP_MAP_POINTER_P (kind & typemask)) cur_node.host_end = cur_node.host_start + sizes[i]; else cur_node.host_end = cur_node.host_start + sizeof (void *); - splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n) { tgt->list[i] = n; - gomp_map_vars_existing (n, &cur_node, kinds[i]); + gomp_map_vars_existing (n, &cur_node, kind & typemask); } else { - size_t align = (size_t) 1 << (kinds[i] >> 3); tgt->list[i] = NULL; + + size_t align = (size_t) 1 << (kind >> rshift); not_found_cnt++; if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); tgt_size += cur_node.host_end - cur_node.host_start; - if ((kinds[i] & 7) == 5) + if ((kind & typemask) == GOMP_MAP_TO_PSET) { size_t j; for (j = i + 1; j < mapnum; j++) - if ((kinds[j] & 7) != 4) + if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + & typemask)) break; else if ((uintptr_t) hostaddrs[j] < cur_node.host_start || ((uintptr_t) hostaddrs[j] + sizeof (void *) @@ -272,7 +219,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - if (not_found_cnt || is_target) + if (devaddrs) + { + if (mapnum != 1) + gomp_fatal ("unexpected aggregation"); + tgt->to_free = devaddrs[0]; + tgt->tgt_start = (uintptr_t) tgt->to_free; + tgt->tgt_end = tgt->tgt_start + sizes[0]; + } + else if (not_found_cnt || is_target) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -304,44 +259,47 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, for (i = 0; i < mapnum; i++) if (tgt->list[i] == NULL) { + int kind = get_kind (is_openacc, kinds, i); if (hostaddrs[i] == NULL) continue; splay_tree_key k = &array->key; k->host_start = (uintptr_t) hostaddrs[i]; - if ((kinds[i] & 7) != 4) + if (!GOMP_MAP_POINTER_P (kind & typemask)) k->host_end = k->host_start + sizes[i]; else k->host_end = k->host_start + sizeof (void *); - splay_tree_key n - = splay_tree_lookup (&devicep->dev_splay_tree, k); + splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k); if (n) { tgt->list[i] = n; - gomp_map_vars_existing (n, k, kinds[i]); + gomp_map_vars_existing (n, k, kind & typemask); } else { - size_t align = (size_t) 1 << (kinds[i] >> 3); + size_t align = (size_t) 1 << (kind >> rshift); tgt->list[i] = k; tgt_size = (tgt_size + align - 1) & ~(align - 1); k->tgt = tgt; k->tgt_offset = tgt_size; tgt_size += k->host_end - k->host_start; - k->copy_from = false; - if ((kinds[i] & 7) == 2 || (kinds[i] & 7) == 3) - k->copy_from = true; + k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); k->refcount = 1; + k->async_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; - splay_tree_insert (&devicep->dev_splay_tree, array); - switch (kinds[i] & 7) + splay_tree_insert (&mm->splay_tree, array); + switch (kind & typemask) { - case 0: /* ALLOC */ - case 2: /* FROM */ + case GOMP_MAP_ALLOC: + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_FORCE_FROM: break; - case 1: /* TO */ - case 3: /* TOFROM */ + case GOMP_MAP_TO: + case GOMP_MAP_TOFROM: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_TOFROM: /* FIXME: Perhaps add some smarts, like if copying several adjacent fields from host to target, use some host buffer to avoid sending each var individually. */ @@ -351,12 +309,13 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, (void *) k->host_start, k->host_end - k->host_start); break; - case 4: /* POINTER */ + case GOMP_MAP_POINTER: cur_node.host_start = (uintptr_t) *(void **) k->host_start; if (cur_node.host_start == (uintptr_t) NULL) { cur_node.tgt_offset = (uintptr_t) NULL; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset), @@ -367,19 +326,16 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, /* Add bias to the pointer value. */ cur_node.host_start += sizes[i]; cur_node.host_end = cur_node.host_start + 1; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { /* Could be possibly zero size array section. */ cur_node.host_end--; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { cur_node.host_start--; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); cur_node.host_start++; } } @@ -393,20 +349,24 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, array section. Now subtract bias to get what we want to initialize the pointer with. */ cur_node.tgt_offset -= sizes[i]; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset), (void *) &cur_node.tgt_offset, sizeof (void *)); break; - case 5: /* TO_PSET */ + case GOMP_MAP_TO_PSET: + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, k->host_end - k->host_start); + for (j = i + 1; j < mapnum; j++) - if ((kinds[j] & 7) != 4) + if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + & typemask)) break; else if ((uintptr_t) hostaddrs[j] < k->host_start || ((uintptr_t) hostaddrs[j] + sizeof (void *) @@ -421,6 +381,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, if (cur_node.host_start == (uintptr_t) NULL) { cur_node.tgt_offset = (uintptr_t) NULL; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset + ((uintptr_t) hostaddrs[j] @@ -433,19 +394,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, /* Add bias to the pointer value. */ cur_node.host_start += sizes[j]; cur_node.host_end = cur_node.host_start + 1; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { /* Could be possibly zero size array section. */ cur_node.host_end--; - n = splay_tree_lookup (&devicep->dev_splay_tree, + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { cur_node.host_start--; - n = splay_tree_lookup - (&devicep->dev_splay_tree, &cur_node); + n = splay_tree_lookup (&mm->splay_tree, + &cur_node); cur_node.host_start++; } } @@ -460,6 +420,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, array section. Now subtract bias to get what we want to initialize the pointer with. */ cur_node.tgt_offset -= sizes[j]; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset + ((uintptr_t) hostaddrs[j] @@ -468,12 +429,35 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, sizeof (void *)); i++; } - break; + break; + case GOMP_MAP_FORCE_PRESENT: + { + /* We already looked up the memory region above and it + was missing. */ + size_t size = k->host_end - k->host_start; + gomp_fatal ("present clause: !acc_is_present (%p, " + "%zd (0x%zx))", (void *) k->host_start, + size, size); + } + break; + case GOMP_MAP_FORCE_DEVICEPTR: + assert (k->host_end - k->host_start == sizeof (void *)); + + devicep->host2dev_func (devicep->target_id, + (void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + sizeof (void *)); + break; + default: + gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, + kind); } array++; } } } + if (is_target) { for (i = 0; i < mapnum; i++) @@ -483,6 +467,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, else cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start + tgt->list[i]->tgt_offset; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + i * sizeof (void *)), @@ -491,7 +476,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&mm->lock); return tgt; } @@ -506,24 +491,29 @@ gomp_unmap_tgt (struct target_mem_desc *tgt) free (tgt); } -static void -gomp_unmap_vars (struct target_mem_desc *tgt) +/* Decrease the refcount for a set of mapped variables, and queue asychronous + copies from the device back to the host after any work that has been issued. + Because the regions are still "live", increment an asynchronous reference + count to indicate that they should not be unmapped from host-side data + structures until the asynchronous copy has completed. */ + +attribute_hidden void +gomp_copy_from_async (struct target_mem_desc *tgt) { struct gomp_device_descr *devicep = tgt->device_descr; - - if (tgt->list_count == 0) - { - free (tgt); - return; - } - + struct gomp_memory_mapping *mm = tgt->mem_map; size_t i; - gomp_mutex_lock (&devicep->dev_env_lock); + + gomp_mutex_lock (&mm->lock); + for (i = 0; i < tgt->list_count; i++) if (tgt->list[i] == NULL) ; else if (tgt->list[i]->refcount > 1) - tgt->list[i]->refcount--; + { + tgt->list[i]->refcount--; + tgt->list[i]->async_refcount++; + } else { splay_tree_key k = tgt->list[i]; @@ -531,7 +521,45 @@ gomp_unmap_vars (struct target_mem_desc *tgt) devicep->dev2host_func (devicep->target_id, (void *) k->host_start, (void *) (k->tgt->tgt_start + k->tgt_offset), k->host_end - k->host_start); - splay_tree_remove (&devicep->dev_splay_tree, k); + } + + gomp_mutex_unlock (&mm->lock); +} + +/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant + variables back from device to host: if it is false, it is assumed that this + has been done already, i.e. by gomp_copy_from_async above. */ + +attribute_hidden void +gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct gomp_memory_mapping *mm = tgt->mem_map; + + if (tgt->list_count == 0) + { + free (tgt); + return; + } + + gomp_mutex_lock (&mm->lock); + + size_t i; + for (i = 0; i < tgt->list_count; i++) + if (tgt->list[i] == NULL) + ; + else if (tgt->list[i]->refcount > 1) + tgt->list[i]->refcount--; + else if (tgt->list[i]->async_refcount > 0) + tgt->list[i]->async_refcount--; + else + { + splay_tree_key k = tgt->list[i]; + if (k->copy_from && do_copyfrom) + devicep->dev2host_func (devicep->target_id, (void *) k->host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset), + k->host_end - k->host_start); + splay_tree_remove (&mm->splay_tree, k); if (k->tgt->refcount > 1) k->tgt->refcount--; else @@ -542,15 +570,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt) tgt->refcount--; else gomp_unmap_tgt (tgt); - gomp_mutex_unlock (&devicep->dev_env_lock); + + gomp_mutex_unlock (&mm->lock); } static void -gomp_update (struct gomp_device_descr *devicep, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds) +gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm, + size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, + bool is_openacc) { size_t i; struct splay_tree_key_s cur_node; + const int typemask = is_openacc ? 0xff : 0x7; if (!devicep) return; @@ -558,16 +589,17 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, if (mapnum == 0) return; - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&mm->lock); for (i = 0; i < mapnum; i++) if (sizes[i]) { cur_node.host_start = (uintptr_t) hostaddrs[i]; cur_node.host_end = cur_node.host_start + sizes[i]; - splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree, + splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n) { + int kind = get_kind (is_openacc, kinds, i); if (n->host_start > cur_node.host_start || n->host_end < cur_node.host_end) gomp_fatal ("Trying to update [%p..%p) object when" @@ -576,7 +608,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, (void *) cur_node.host_end, (void *) n->host_start, (void *) n->host_end); - if ((kinds[i] & 7) == 1) + if (GOMP_MAP_COPY_TO_P (kind & typemask)) devicep->host2dev_func (devicep->target_id, (void *) (n->tgt->tgt_start + n->tgt_offset @@ -584,7 +616,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, - n->host_start), (void *) cur_node.host_start, cur_node.host_end - cur_node.host_start); - else if ((kinds[i] & 7) == 2) + if (GOMP_MAP_COPY_FROM_P (kind & typemask)) devicep->dev2host_func (devicep->target_id, (void *) cur_node.host_start, (void *) (n->tgt->tgt_start @@ -598,7 +630,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, (void *) cur_node.host_start, (void *) cur_node.host_end); } - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&mm->lock); } /* This function should be called from every offload image. @@ -620,13 +652,23 @@ GOMP_offload_register (void *host_table, enum offload_target_type target_type, num_offload_images++; } -/* This function initializes the target device, specified by DEVICEP. */ +/* This function initializes the target device, specified by DEVICEP. DEVICEP + must be locked on entry, and remains locked on return. */ -static void +attribute_hidden void gomp_init_device (struct gomp_device_descr *devicep) { devicep->init_device_func (devicep->target_id); + devicep->is_initialized = true; +} +/* Initialize address mapping tables. MM must be locked on entry, and remains + locked on return. */ + +attribute_hidden void +gomp_init_tables (struct gomp_device_descr *devicep, + struct gomp_memory_mapping *mm) +{ /* Get address mapping table for device. */ struct mapping_table *table = NULL; int num_entries = devicep->get_table_func (devicep->target_id, &table); @@ -653,30 +695,63 @@ gomp_init_device (struct gomp_device_descr *devicep) k->tgt = tgt; node->left = NULL; node->right = NULL; - splay_tree_insert (&devicep->dev_splay_tree, node); + splay_tree_insert (&mm->splay_tree, node); } free (table); - devicep->is_initialized = true; + mm->is_initialized = true; +} + +/* Free address mapping tables. MM must be locked on entry, and remains locked + on return. */ + +attribute_hidden void +gomp_free_memmap (struct gomp_memory_mapping *mm) +{ + while (mm->splay_tree.root) + { + struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt; + + splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key); + free (tgt->array); + free (tgt); + } + + mm->is_initialized = false; +} + +/* This function de-initializes the target device, specified by DEVICEP. + DEVICEP must be locked on entry, and remains locked on return. */ + +attribute_hidden void +gomp_fini_device (struct gomp_device_descr *devicep) +{ + if (devicep->is_initialized) + devicep->fini_device_func (devicep->target_id); + + devicep->is_initialized = false; } /* Called when encountering a target directive. If DEVICE - is -1, it means use device-var ICV. If it is -2 (or any other value - larger than last available hw device, use host fallback. - FN is address of host code, OPENMP_TARGET contains value of the - __OPENMP_TARGET__ symbol in the shared library or binary that invokes + is GOMP_DEVICE_ICV, it means use device-var ICV. If it is + GOMP_DEVICE_HOST_FALLBACK (or any value + larger than last available hw device), use host fallback. + FN is address of host code, OFFLOAD_TABLE contains value of the + __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes GOMP_target. HOSTADDRS, SIZES and KINDS are arrays with MAPNUM entries, with addresses of the host objects, sizes of the host objects (resp. for pointer kind pointer bias and assumed sizeof (void *) size) and kinds. */ void -GOMP_target (int device, void (*fn) (void *), const void *openmp_target, +GOMP_target (int device, void (*fn) (void *), const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { struct gomp_device_descr *devicep = resolve_device (device); - if (devicep == NULL) + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) { /* Host fallback. */ struct gomp_thread old_thr, *thr = gomp_thread (); @@ -693,20 +768,38 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target, return; } - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&devicep->lock); if (!devicep->is_initialized) gomp_init_device (devicep); + gomp_mutex_unlock (&devicep->lock); - struct splay_tree_key_s k; - k.host_start = (uintptr_t) fn; - k.host_end = k.host_start + 1; - splay_tree_key tgt_fn = splay_tree_lookup (&devicep->dev_splay_tree, &k); - if (tgt_fn == NULL) - gomp_fatal ("Target function wasn't mapped"); - gomp_mutex_unlock (&devicep->dev_env_lock); + void *fn_addr; + + if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) + fn_addr = (void *) fn; + else + { + struct gomp_memory_mapping *mm = &devicep->mem_map; + gomp_mutex_lock (&mm->lock); + + if (!mm->is_initialized) + gomp_init_tables (devicep, mm); + + struct splay_tree_key_s k; + k.host_start = (uintptr_t) fn; + k.host_end = k.host_start + 1; + splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k); + if (tgt_fn == NULL) + gomp_fatal ("Target function wasn't mapped"); + + gomp_mutex_unlock (&mm->lock); + + fn_addr = (void *) tgt_fn->tgt->tgt_start; + } struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, true); + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + true); struct gomp_thread old_thr, *thr = gomp_thread (); old_thr = *thr; memset (thr, '\0', sizeof (*thr)); @@ -715,19 +808,20 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target, thr->place = old_thr.place; thr->ts.place_partition_len = gomp_places_list_len; } - devicep->run_func (devicep->target_id, (void *) tgt_fn->tgt->tgt_start, - (void *) tgt_vars->tgt_start); + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); gomp_free_thread (thr); *thr = old_thr; - gomp_unmap_vars (tgt_vars); + gomp_unmap_vars (tgt_vars, true); } void -GOMP_target_data (int device, const void *openmp_target, size_t mapnum, +GOMP_target_data (int device, const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { struct gomp_device_descr *devicep = resolve_device (device); - if (devicep == NULL) + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) { /* Host fallback. */ struct gomp_task_icv *icv = gomp_icv (false); @@ -738,20 +832,27 @@ GOMP_target_data (int device, const void *openmp_target, size_t mapnum, new #pragma omp target data, otherwise GOMP_target_end_data would get out of sync. */ struct target_mem_desc *tgt - = gomp_map_vars (NULL, 0, NULL, NULL, NULL, false); + = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); tgt->prev = icv->target_data; icv->target_data = tgt; } return; } - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&devicep->lock); if (!devicep->is_initialized) gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&devicep->lock); + + struct gomp_memory_mapping *mm = &devicep->mem_map; + gomp_mutex_lock (&mm->lock); + if (!mm->is_initialized) + gomp_init_tables (devicep, mm); + gomp_mutex_unlock (&mm->lock); struct target_mem_desc *tgt - = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, false); + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + false); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -765,24 +866,32 @@ GOMP_target_end_data (void) { struct target_mem_desc *tgt = icv->target_data; icv->target_data = tgt->prev; - gomp_unmap_vars (tgt); + gomp_unmap_vars (tgt, true); } } void -GOMP_target_update (int device, const void *openmp_target, size_t mapnum, +GOMP_target_update (int device, const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { struct gomp_device_descr *devicep = resolve_device (device); - if (devicep == NULL) + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) return; - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&devicep->lock); if (!devicep->is_initialized) gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&devicep->lock); - gomp_update (devicep, mapnum, hostaddrs, sizes, kinds); + struct gomp_memory_mapping *mm = &devicep->mem_map; + gomp_mutex_lock (&mm->lock); + if (!mm->is_initialized) + gomp_init_tables (devicep, mm); + gomp_mutex_unlock (&mm->lock); + + gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false); } void @@ -808,54 +917,137 @@ static bool gomp_load_plugin_for_device (struct gomp_device_descr *device, const char *plugin_name) { + char *err = NULL, *last_missing = NULL; + int optional_present, optional_total; + + /* Clear any existing error. */ + dlerror (); + void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); if (!plugin_handle) - return false; + { + err = dlerror (); + goto out; + } /* Check if all required functions are available in the plugin and store their handlers. */ -#define DLSYM(f) \ - do \ - { \ - device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_"#f); \ - if (!device->f##_func) \ - return false; \ - } \ +#define DLSYM(f) \ + do \ + { \ + device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \ + err = dlerror (); \ + if (err != NULL) \ + goto out; \ + } \ while (0) + /* Similar, but missing functions are not an error. */ +#define DLSYM_OPT(f, n) \ + do \ + { \ + char *tmp_err; \ + device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \ + tmp_err = dlerror (); \ + if (tmp_err == NULL) \ + optional_present++; \ + else \ + last_missing = #n; \ + optional_total++; \ + } \ + while (0) + + DLSYM (get_name); + DLSYM (get_caps); DLSYM (get_type); DLSYM (get_num_devices); DLSYM (register_image); DLSYM (init_device); + DLSYM (fini_device); DLSYM (get_table); DLSYM (alloc); DLSYM (free); DLSYM (dev2host); DLSYM (host2dev); - DLSYM (run); + device->capabilities = device->get_caps_func (); + if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + DLSYM (run); + if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) + { + optional_present = optional_total = 0; + DLSYM_OPT (openacc.exec, openacc_parallel); + DLSYM_OPT (openacc.open_device, openacc_open_device); + DLSYM_OPT (openacc.close_device, openacc_close_device); + DLSYM_OPT (openacc.get_device_num, openacc_get_device_num); + DLSYM_OPT (openacc.set_device_num, openacc_set_device_num); + DLSYM_OPT (openacc.register_async_cleanup, + openacc_register_async_cleanup); + DLSYM_OPT (openacc.async_test, openacc_async_test); + DLSYM_OPT (openacc.async_test_all, openacc_async_test_all); + DLSYM_OPT (openacc.async_wait, openacc_async_wait); + DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async); + DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all); + DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async); + DLSYM_OPT (openacc.async_set_async, openacc_async_set_async); + DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data); + DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data); + /* Require all the OpenACC handlers if we have + GOMP_OFFLOAD_CAP_OPENACC_200. */ + if (optional_present != optional_total) + { + err = "plugin missing OpenACC handler function"; + goto out; + } + optional_present = optional_total = 0; + DLSYM_OPT (openacc.cuda.get_current_device, + openacc_get_current_cuda_device); + DLSYM_OPT (openacc.cuda.get_current_context, + openacc_get_current_cuda_context); + DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream); + DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream); + /* Make sure all the CUDA functions are there if any of them are. */ + if (optional_present && optional_present != optional_total) + { + err = "plugin missing OpenACC CUDA handler function"; + goto out; + } + } #undef DLSYM +#undef DLSYM_OPT - return true; + out: + if (err != NULL) + { + gomp_error ("while loading %s: %s", plugin_name, err); + if (last_missing) + gomp_error ("missing function was %s", last_missing); + if (plugin_handle) + dlclose (plugin_handle); + } + return err == NULL; } -/* This function finds OFFLOAD_IMAGES corresponding to DEVICE type, and - registers them in the plugin. */ +/* This function adds a compatible offload image IMAGE to an accelerator device + DEVICE. DEVICE must be locked on entry, and remains locked on return. */ static void -gomp_register_images_for_device (struct gomp_device_descr *device) +gomp_register_image_for_device (struct gomp_device_descr *device, + struct offload_image_descr *image) { - int i; - for (i = 0; i < num_offload_images; i++) + if (!device->offload_regions_registered + && (device->type == image->type + || device->type == OFFLOAD_TARGET_TYPE_HOST)) { - struct offload_image_descr *image = &offload_images[i]; - if (image->type == device->type) - device->register_image_func (image->host_table, image->target_data); + device->register_image_func (image->host_table, image->target_data); + device->offload_regions_registered = true; } } /* This function initializes the runtime needed for offloading. - It parses the list of offload targets and tries to load the plugins for these - targets. Result of the function is properly initialized variable NUM_DEVICES - and array DEVICES, containing descriptors for corresponding devices. */ + It parses the list of offload targets and tries to load the plugins for + these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP + will be set, and the array DEVICES initialized, containing descriptors for + corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows + by the others. */ static void gomp_target_init (void) @@ -894,6 +1086,8 @@ gomp_target_init (void) new_num_devices = current_device.get_num_devices_func (); if (new_num_devices >= 1) { + /* Augment DEVICES and NUM_DEVICES. */ + devices = realloc (devices, (num_devices + new_num_devices) * sizeof (struct gomp_device_descr)); if (!devices) @@ -903,16 +1097,21 @@ gomp_target_init (void) break; } + current_device.name = current_device.get_name_func (); + /* current_device.capabilities has already been set. */ current_device.type = current_device.get_type_func (); + current_device.mem_map.is_initialized = false; + current_device.mem_map.splay_tree.root = NULL; current_device.is_initialized = false; - current_device.dev_splay_tree.root = NULL; - gomp_register_images_for_device (¤t_device); + current_device.offload_regions_registered = false; + current_device.openacc.data_environ = NULL; + current_device.openacc.target_data = NULL; for (i = 0; i < new_num_devices; i++) { - current_device.id = num_devices + 1; current_device.target_id = i; devices[num_devices] = current_device; - gomp_mutex_init (&devices[num_devices].dev_env_lock); + gomp_mutex_init (&devices[num_devices].mem_map.lock); + gomp_mutex_init (&devices[num_devices].lock); num_devices++; } } @@ -923,6 +1122,41 @@ gomp_target_init (void) } while (next); + /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set + NUM_DEVICES_OPENMP. */ + struct gomp_device_descr *devices_s + = malloc (num_devices * sizeof (struct gomp_device_descr)); + if (!devices_s) + { + num_devices = 0; + free (devices); + devices = NULL; + } + num_devices_openmp = 0; + for (i = 0; i < num_devices; i++) + if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + devices_s[num_devices_openmp++] = devices[i]; + int num_devices_after_openmp = num_devices_openmp; + for (i = 0; i < num_devices; i++) + if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + devices_s[num_devices_after_openmp++] = devices[i]; + free (devices); + devices = devices_s; + + for (i = 0; i < num_devices; i++) + { + int j; + + for (j = 0; j < num_offload_images; j++) + gomp_register_image_for_device (&devices[i], &offload_images[j]); + + /* The 'devices' array can be moved (by the realloc call) until we have + found all the plugins, so registering with the OpenACC runtime (which + takes a copy of the pointer argument) must be delayed until now. */ + if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) + goacc_register (&devices[i]); + } + free (offload_images); offload_images = NULL; num_offload_images = 0; diff --git a/libgomp/testsuite/Makefile.am b/libgomp/testsuite/Makefile.am index 9cc103a1c4d..66a9d947e04 100644 --- a/libgomp/testsuite/Makefile.am +++ b/libgomp/testsuite/Makefile.am @@ -12,7 +12,16 @@ _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \ echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi) RUNTEST = "$(_RUNTEST) $(AM_RUNTESTFLAGS)" -# Used for support non-fallback offloading. -export OFFLOAD_TARGETS = $(offload_targets) -export OFFLOAD_ADDITIONAL_OPTIONS = $(offload_additional_options) -export OFFLOAD_ADDITIONAL_LIB_PATHS = $(offload_additional_lib_paths) + +# Instead of directly in ../testsuite/libgomp-test-support.exp.in, the +# following variables have to be "routed through" this Makefile, for expansion +# of the several (Makefile) variables used therein. +libgomp-test-support.exp: libgomp-test-support.pt.exp Makefile + cp $< $@.tmp + echo >> $@.tmp \ + 'set offload_additional_options "$(offload_additional_options)"' + echo >> $@.tmp \ + 'set offload_additional_lib_paths "$(offload_additional_lib_paths)"' + mv $@.tmp $@ + +all-local: libgomp-test-support.exp diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in index 2f845f0c7cb..352fc3fec7c 100644 --- a/libgomp/testsuite/Makefile.in +++ b/libgomp/testsuite/Makefile.in @@ -35,7 +35,8 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = testsuite -DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(srcdir)/libgomp-test-support.exp.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ $(top_srcdir)/../config/depstand.m4 \ @@ -49,12 +50,13 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ $(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \ $(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \ $(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \ - $(top_srcdir)/../libtool.m4 $(top_srcdir)/configure.ac + $(top_srcdir)/../libtool.m4 $(top_srcdir)/plugin/configfrag.ac \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs CONFIG_HEADER = $(top_builddir)/config.h -CONFIG_CLEAN_FILES = +CONFIG_CLEAN_FILES = libgomp-test-support.pt.exp CONFIG_CLEAN_VPATH_FILES = SOURCES = DEJATOOL = $(PACKAGE) @@ -71,6 +73,8 @@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ +CUDA_DRIVER_INCLUDE = @CUDA_DRIVER_INCLUDE@ +CUDA_DRIVER_LIB = @CUDA_DRIVER_LIB@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ @@ -129,6 +133,10 @@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PLUGIN_NVPTX = @PLUGIN_NVPTX@ +PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@ +PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@ +PLUGIN_NVPTX_LIBS = @PLUGIN_NVPTX_LIBS@ RANLIB = @RANLIB@ SECTION_LDFLAGS = @SECTION_LDFLAGS@ SED = @SED@ @@ -250,6 +258,8 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): +libgomp-test-support.pt.exp: $(top_builddir)/config.status $(srcdir)/libgomp-test-support.exp.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo @@ -303,7 +313,7 @@ distclean-DEJAGNU: check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-DEJAGNU check: check-am -all-am: Makefile +all-am: Makefile all-local installdirs: install: install-am install-exec: install-exec-am @@ -398,23 +408,31 @@ uninstall-am: .MAKE: check-am install-am install-strip -.PHONY: all all-am check check-DEJAGNU check-am clean clean-generic \ - clean-libtool distclean distclean-DEJAGNU distclean-generic \ - distclean-libtool dvi dvi-am html html-am info info-am install \ - install-am install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-info install-info-am install-man \ - install-pdf install-pdf-am install-ps install-ps-am \ - install-strip installcheck installcheck-am installdirs \ - maintainer-clean maintainer-clean-generic mostlyclean \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - uninstall uninstall-am +.PHONY: all all-am all-local check check-DEJAGNU check-am clean \ + clean-generic clean-libtool distclean distclean-DEJAGNU \ + distclean-generic distclean-libtool dvi dvi-am html html-am \ + info info-am install install-am install-data install-data-am \ + install-dvi install-dvi-am install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am uninstall uninstall-am -# Used for support non-fallback offloading. -export OFFLOAD_TARGETS = $(offload_targets) -export OFFLOAD_ADDITIONAL_OPTIONS = $(offload_additional_options) -export OFFLOAD_ADDITIONAL_LIB_PATHS = $(offload_additional_lib_paths) +# Instead of directly in ../testsuite/libgomp-test-support.exp.in, the +# following variables have to be "routed through" this Makefile, for expansion +# of the several (Makefile) variables used therein. +libgomp-test-support.exp: libgomp-test-support.pt.exp Makefile + cp $< $@.tmp + echo >> $@.tmp \ + 'set offload_additional_options "$(offload_additional_options)"' + echo >> $@.tmp \ + 'set offload_additional_lib_paths "$(offload_additional_lib_paths)"' + mv $@.tmp $@ + +all-local: libgomp-test-support.exp # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 2d6f822adf9..5a6eec12352 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -32,6 +32,29 @@ load_gcc_lib timeout-dg.exp load_gcc_lib torture-options.exp load_gcc_lib fortran-modules.exp +# Try to load a test support file, built during libgomp configuration. +load_file libgomp-test-support.exp + +# Populate offload_targets_s (offloading targets separated by a space), and +# offload_targets_s_openacc (the same, but with OpenACC names; OpenACC spells +# some of them a little differently). +set offload_targets_s [split $offload_targets ","] +set offload_targets_s_openacc {} +foreach offload_target_openacc $offload_targets_s { + switch $offload_target_openacc { + intelmic { + # Skip; will all FAIL because of missing + # GOMP_OFFLOAD_CAP_OPENACC_200. + continue + } + nvptx { + set offload_target_openacc "nvidia" + } + } + lappend offload_targets_s_openacc "$offload_target_openacc" +} +lappend offload_targets_s_openacc "host" + set dg-do-what-default run # @@ -108,13 +131,9 @@ proc libgomp_init { args } { # Compute what needs to be put into LD_LIBRARY_PATH set always_ld_library_path ".:${blddir}/.libs" - # Get offload-related variables from environment (exported by Makefile) - set offload_targets [getenv OFFLOAD_TARGETS] - set offload_additional_options [getenv OFFLOAD_ADDITIONAL_OPTIONS] - set offload_additional_lib_paths [getenv OFFLOAD_ADDITIONAL_LIB_PATHS] - # Add liboffloadmic build directory in LD_LIBRARY_PATH to support # non-fallback testing for Intel MIC targets + global offload_targets if { [string match "*,intelmic,*" ",$offload_targets,"] } { append always_ld_library_path ":${blddir}/../liboffloadmic/.libs" append always_ld_library_path ":${blddir}/../liboffloadmic/plugin/.libs" @@ -122,6 +141,7 @@ proc libgomp_init { args } { append always_ld_library_path ":${blddir}/../libstdc++-v3/src/.libs" } + global offload_additional_lib_paths if { $offload_additional_lib_paths != "" } { append always_ld_library_path "${offload_additional_lib_paths}" } @@ -158,9 +178,29 @@ proc libgomp_init { args } { lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs" lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}" lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs" + # The top-level include directory, for gomp-constants.h. + lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include" } lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.." + # For build-tree testing, also consider the CUDA paths used for builing. + # For installed testing, we assume all that to be provided in the sysroot. + if { $blddir != "" } { + global cuda_driver_include + global cuda_driver_lib + if { $cuda_driver_include != "" } { + # Stop gfortran from freaking out: + # Warning: Nonexistent include directory "[...]" + if {[file exists $cuda_driver_include]} { + lappend ALWAYS_CFLAGS "additional_flags=-I$cuda_driver_include" + } + } + if { $cuda_driver_lib != "" } { + lappend ALWAYS_CFLAGS "additional_flags=-L$cuda_driver_lib" + append always_ld_library_path ":$cuda_driver_lib" + } + } + # We use atomic operations in the testcases to validate results. if { ([istarget i?86-*-*] || [istarget x86_64-*-*]) && [check_effective_target_ia32] } { @@ -191,6 +231,7 @@ proc libgomp_init { args } { # Used for support non-fallback offloading. # Help GCC to find target mkoffload. + global offload_additional_options if { $offload_additional_options != "" } { lappend ALWAYS_CFLAGS "additional_flags=${offload_additional_options}" } @@ -278,3 +319,29 @@ proc check_effective_target_offload_device { } { } } ] } + +# Return 1 if at least one nvidia board is present. + +proc check_effective_target_openacc_nvidia_accel_present { } { + return [check_runtime openacc_nvidia_accel_present { + #include + int main () { + return !(acc_get_num_devices (acc_device_nvidia) > 0); + } + } "" ] +} + +# Return 1 if at least one nvidia board is present, and the nvidia device type +# is selected by default by means of setting the environment variable +# ACC_DEVICE_TYPE. + +proc check_effective_target_openacc_nvidia_accel_selected { } { + if { ![check_effective_target_openacc_nvidia_accel_present] } { + return 0; + } + global offload_target_openacc + if { $offload_target_openacc == "nvidia" } { + return 1; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp-test-support.exp.in b/libgomp/testsuite/libgomp-test-support.exp.in new file mode 100644 index 00000000000..764bec06c29 --- /dev/null +++ b/libgomp/testsuite/libgomp-test-support.exp.in @@ -0,0 +1,4 @@ +set cuda_driver_include "@CUDA_DRIVER_INCLUDE@" +set cuda_driver_lib "@CUDA_DRIVER_LIB@" + +set offload_targets "@offload_targets@" diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp new file mode 100644 index 00000000000..f486f9b97ba --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp @@ -0,0 +1,107 @@ +# This whole file adapted from libgomp.c++/c++.exp. + +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp + +global shlib_ext + +set shlib_ext [get_shlib_extension] +set lang_link_flags "-lstdc++" +set lang_test_file_found 0 +set lang_library_path "../libstdc++-v3/src/.libs" +if [info exists lang_include_flags] then { + unset lang_include_flags +} + +# Initialize dg. +dg-init + +# Turn on OpenACC. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc" + +# Switch into C++ mode. Otherwise, the libgomp.oacc-c-c++-common/*.c +# files would be compiled as C files. +set SAVE_GCC_UNDER_TEST "$GCC_UNDER_TEST" +set GCC_UNDER_TEST "$GCC_UNDER_TEST -x c++" + +set blddir [lookfor_file [get_multilibs] libgomp] + + +if { $blddir != "" } { + # Look for a static libstdc++ first. + if [file exists "${blddir}/${lang_library_path}/libstdc++.a"] { + set lang_test_file "${lang_library_path}/libstdc++.a" + set lang_test_file_found 1 + # We may have a shared only build, so look for a shared libstdc++. + } elseif [file exists "${blddir}/${lang_library_path}/libstdc++.${shlib_ext}"] { + set lang_test_file "${lang_library_path}/libstdc++.${shlib_ext}" + set lang_test_file_found 1 + } else { + puts "No libstdc++ library found, will not execute c++ tests" + } +} elseif { [info exists GXX_UNDER_TEST] } { + set lang_test_file_found 1 + # Needs to exist for libgomp.exp. + set lang_test_file "" +} else { + puts "GXX_UNDER_TEST not defined, will not execute c++ tests" +} + +if { $lang_test_file_found } { + # Gather a list of all tests. + set tests [lsort [concat \ + [find $srcdir/$subdir *.C] \ + [find $srcdir/$subdir/../libgomp.oacc-c-c++-common *.c]]] + + if { $blddir != "" } { + set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}" + } else { + set ld_library_path "$always_ld_library_path" + } + append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] + set_ld_library_path_env_vars + + set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags" + if { [file exists $flags_file] } { + set libstdcxx_includes [exec sh $flags_file --build-includes] + } else { + set libstdcxx_includes "" + } + + # Test OpenACC with available accelerators. + foreach offload_target_openacc $offload_targets_s_openacc { + set tagopt "-DACC_DEVICE_TYPE_$offload_target_openacc=1" + + switch $offload_target_openacc { + host { + set acc_mem_shared 1 + } + host_nonshm { + set acc_mem_shared 0 + } + nvidia { + # Copy ptx file (TEMPORARY) + remote_download host $srcdir/libgomp.oacc-c-c++-common/subr.ptx + + # Where timer.h lives + lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/libgomp.oacc-c-c++-common" + + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + setenv ACC_DEVICE_TYPE $offload_target_openacc + + dg-runtest $tests "$tagopt" $libstdcxx_includes + } +} + +# See above. +set GCC_UNDER_TEST "$SAVE_GCC_UNDER_TEST" + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-1.c new file mode 100644 index 00000000000..f88b9e3c7f5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-1.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */ + +#include + +int +main (void) +{ + +#pragma acc parallel + { + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-2.c new file mode 100644 index 00000000000..debb81e78f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + +#pragma acc parallel + { + if (argc != 1) + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-3.c new file mode 100644 index 00000000000..be7aaa86551 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-3.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */ + +#include + +int +main (void) +{ + +#pragma acc kernels + { + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-4.c new file mode 100644 index 00000000000..c29ca3fb118 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-4.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + +#pragma acc kernels + { + if (argc != 1) + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c new file mode 100644 index 00000000000..81ea47689a9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c @@ -0,0 +1,75 @@ +/* Disable the acc_on_device builtin; we want to test the libgomp library + function. */ +/* { dg-additional-options "-fno-builtin-acc_on_device" } */ + +#include +#include + +int +main (int argc, char *argv[]) +{ + /* Host. */ + + { + if (!acc_on_device (acc_device_none)) + abort (); + if (!acc_on_device (acc_device_host)) + abort (); + if (acc_on_device (acc_device_host_nonshm)) + abort (); + if (acc_on_device (acc_device_not_host)) + abort (); + if (acc_on_device (acc_device_nvidia)) + abort (); + } + + + /* Host via offloading fallback mode. */ + +#pragma acc parallel if(0) + { + if (!acc_on_device (acc_device_none)) + abort (); + if (!acc_on_device (acc_device_host)) + abort (); + if (acc_on_device (acc_device_host_nonshm)) + abort (); + if (acc_on_device (acc_device_not_host)) + abort (); + if (acc_on_device (acc_device_nvidia)) + abort (); + } + + +#if !ACC_DEVICE_TYPE_host + + /* Offloaded. */ + +#pragma acc parallel + { + if (acc_on_device (acc_device_none)) + abort (); + if (acc_on_device (acc_device_host)) + abort (); +#if ACC_DEVICE_TYPE_host_nonshm + if (!acc_on_device (acc_device_host_nonshm)) + abort (); +#else + if (acc_on_device (acc_device_host_nonshm)) + abort (); +#endif + if (!acc_on_device (acc_device_not_host)) + abort (); +#if ACC_DEVICE_TYPE_nvidia + if (!acc_on_device (acc_device_nvidia)) + abort (); +#else + if (acc_on_device (acc_device_nvidia)) + abort (); +#endif + } + +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c new file mode 100644 index 00000000000..22cef6db29f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c @@ -0,0 +1,466 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include "cuda.h" + +#include +#include + +int +main (int argc, char **argv) +{ + CUresult r; + CUstream stream1; + int N = 128; //1024 * 1024; + float *a, *b, *c, *d, *e; + int i; + int nbytes; + + acc_init (acc_device_nvidia); + + nbytes = N * sizeof (float); + + a = (float *) malloc (nbytes); + b = (float *) malloc (nbytes); + c = (float *) malloc (nbytes); + d = (float *) malloc (nbytes); + e = (float *) malloc (nbytes); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) + { + +#pragma acc parallel async + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc parallel wait (1) async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 4.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 11.0) + abort (); + } + + + r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (1, stream1); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 7.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 7.0) + abort (); + + if (b[i] != 49.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc parallel wait (1) async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 17.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 16.0) + abort (); + + if (c[i] != 4.0) + abort (); + } + + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) + { + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc parallel async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1) + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 25.0) + abort (); + + if (c[i] != 4.0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c new file mode 100644 index 00000000000..3f1f0bb3764 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c @@ -0,0 +1,48 @@ +int +main (int argc, char **argv) +{ +#define N 2 + int a[N], b[N]; + int i; + + for (i = 0; i < N; i++) + { + a[i] = 3; + b[i] = 0; + } + +#pragma acc parallel copyin (a[0:N]) copyout (b[0:N]) +{ + int ii; + + for (ii = 0; ii < N; ii++) + { + const int idx = ii; + int n = 1; + const int len = n; + +#pragma acc cache (a[0:N]) + +#pragma acc cache (a[0:N], b[0:N]) + +#pragma acc cache (a[0]) + +#pragma acc cache (a[0], a[1], b[0:N]) + +#pragma acc cache (a[idx]) + +#pragma acc cache (a[idx:len]) + + b[ii] = a[ii]; + } +} + + + for (i = 0; i < N; i++) + { + if (a[i] != b[i]) + __builtin_abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c new file mode 100644 index 00000000000..51c0cf58a90 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c @@ -0,0 +1,623 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc parallel copyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present_or_copyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 2.0; + } + + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 4.0; + } + +#pragma acc parallel copy (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = a[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 7.0; + } + +#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 9.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 7.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (&a[0]); + acc_unmap_data (&a[0]); + acc_free (d); + + d = (float *) acc_deviceptr (&b[0]); + acc_unmap_data (&b[0]); + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + +#pragma acc parallel copyin (a[0:N]) create (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + +#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + acc_copyin (a, N * sizeof (float)); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (b, d, N * sizeof (float)); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel present (a[0:N]) present (c[0:N]) present (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + if (!acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_copyout (b, N * sizeof (float)); + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + d = (float *) acc_deviceptr (a); + + acc_unmap_data (a); + + acc_free (d); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 6.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + d[ii] = a[ii]; + b[ii] = d[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel pcopyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin (a[0:N]) pcopyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 7.0; + } + +#pragma acc parallel copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-2.c new file mode 100644 index 00000000000..8dc45cb6287 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-2.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N+1]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + return 0; +} +/* { dg-shouldfail "libgomp: \[\h+,\d+\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-1.c new file mode 100644 index 00000000000..80fed6ca550 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include +#include + +int +main (void) +{ + int i, j, k, l = 0; + int a[3][3][3]; + + memset (a, '\0', sizeof (a)); + #pragma acc parallel + #pragma acc loop collapse(4 - 1) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + a[i][j][k] = i + j * 4 + k * 16; + #pragma acc parallel + { + #pragma acc loop collapse(2) reduction(|:l) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + if (a[i][j][k] != i + j * 4 + k * 16) + l = 1; + } + if (l) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c new file mode 100644 index 00000000000..44a77f7665d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ + +#include + +int +main (void) +{ + int i, j, k, l = 0, f = 0, x = 0; + int m1 = 4, m2 = -5, m3 = 17; + + #pragma acc parallel + #pragma acc loop collapse(3) reduction(+:l) + for (i = -2; i < m1; i++) + for (j = m2; j < -2; j++) + { + for (k = 13; k < m3; k++) + { + if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++) + l++; + } + } + + for (i = -2; i < m1; i++) + for (j = m2; j < -2; j++) + { + for (k = 13; k < m3; k++) + { + if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++) + x++; + } + } + + if (l != x) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c new file mode 100644 index 00000000000..a5be7287d22 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include +#include +#include + +int +main (void) +{ + int i2, l = 0, r = 0; + int a[3][3][3]; + + memset (a, '\0', sizeof (a)); + #pragma acc parallel + #pragma acc loop collapse(4 - 1) + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + a[i][j][k] = i + j * 4 + k * 16; +#pragma acc parallel + { + #pragma acc loop collapse(2) reduction(|:l) + for (i2 = 0; i2 < 2; i2++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + if (a[i2][j][k] != i2 + j * 4 + k * 16) + l += 1; + } + + for (i2 = 0; i2 < 2; i2++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + if (a[i2][j][k] != i2 + j * 4 + k * 16) + r += 1; + + if (l != r) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c new file mode 100644 index 00000000000..52dd4353246 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ + +#include + +int +main (void) +{ + int l = 0; + int b[3][3]; + int i, j; + + memset (b, '\0', sizeof (b)); + +#pragma acc parallel copy(b[0:3][0:3]) copy(l) + { +#pragma acc loop collapse(2) reduction(+:l) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + if (b[i][j] != 16) + l += 1; + } + + if (l != 2 * 2) + __builtin_abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c new file mode 100644 index 00000000000..dabc7063c68 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c @@ -0,0 +1,213 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cudaError_t e; + cublasHandle_t h; + CUcontext pctx, ctx; + CUresult r; + int dev; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 1 - cuBLAS creates, OpenACC shares. */ + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + e = cudaGetDevice (&dev); + if (e != cudaSuccess) + { + fprintf (stderr, "cudaGetDevice failed: %d\n", e); + exit (EXIT_FAILURE); + } + + acc_set_device_num (dev, acc_device_nvidia); + + h_X = (float *) malloc (N * sizeof (float)); + if (!h_X) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (!h_Y1) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (!h_Y2) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_X\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + + saxpy (N, alpha, h_X, h_Y2); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&ctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (!ctx) + { + fprintf (stderr, "Expected context\n"); + exit (EXIT_FAILURE); + } + + if (pctx != ctx) + { + fprintf (stderr, "Unexpected new context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c new file mode 100644 index 00000000000..6a52f746dcb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c @@ -0,0 +1,223 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cudaError_t e; + cublasHandle_t h; + CUcontext pctx, ctx; + CUresult r; + int dev; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 2 - cuBLAS creates, OpenACC shares. */ + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + e = cudaGetDevice (&dev); + if (e != cudaSuccess) + { + fprintf (stderr, "cudaGetDevice failed: %d\n", e); + exit (EXIT_FAILURE); + } + + acc_set_device_num (dev, acc_device_nvidia); + + h_X = (float *) malloc (N * sizeof (float)); + if (h_X == 0) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_X\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + +#pragma acc parallel present (h_X[0:N]), copy (h_Y2[0:N]) copyin (alpha) + { + int i; + + for (i = 0; i < N; i++) + { + h_Y2[i] = alpha * h_X[i] + h_Y2[i]; + } + } + + context_check (pctx); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&ctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (!ctx) + { + fprintf (stderr, "Expected context\n"); + exit (EXIT_FAILURE); + } + + if (pctx != ctx) + { + fprintf (stderr, "Unexpected new context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c new file mode 100644 index 00000000000..ccd276cd98f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c @@ -0,0 +1,200 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cublasHandle_t h; + CUcontext pctx; + CUresult r; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 3 - OpenACC creates, cuBLAS shares. */ + + acc_set_device_num (0, acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + h_X = (float *) malloc (N * sizeof (float)); + if (h_X == 0) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_X\n"); + exit (EXIT_FAILURE); + } + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + + saxpy (N, alpha, h_X, h_Y2); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (pctx) + { + fprintf (stderr, "Unexpected context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c new file mode 100644 index 00000000000..71365e8ed32 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c @@ -0,0 +1,213 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cublasHandle_t h; + CUcontext pctx; + CUresult r; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 4 - OpenACC creates, cuBLAS shares. */ + + acc_set_device_num (0, acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + h_X = (float *) malloc (N * sizeof (float)); + if (h_X == 0) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + +#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha) + { + int i; + + for (i = 0; i < N; i++) + { + h_Y2[i] = alpha * h_X[i] + h_Y2[i]; + } + } + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (pctx) + { + fprintf (stderr, "Unexpected context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-1.c new file mode 100644 index 00000000000..e7564cc12b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-1.c @@ -0,0 +1,188 @@ +/* { dg-do run } */ + +#include +#include + +int i; + +int +is_mapped (void *p, size_t n) +{ +#if ACC_MEM_SHARED + return 1; +#else + return acc_is_present (p, n); +#endif +} + +int main(void) +{ + int j; + + i = -1; + j = -2; +#pragma acc data copyin (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data copyout (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + +#pragma acc parallel present (i, j) + { + i = 4; + j = 2; + } + } + if (i != 4 || j != 2) + abort (); + + i = -1; + j = -2; +#pragma acc data create (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data present_or_copyin (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data present_or_copyout (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + +#pragma acc parallel present (i, j) + { + i = 4; + j = 2; + } + } + if (i != 4 || j != 2) + abort (); + + i = -1; + j = -2; +#pragma acc data present_or_copy (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } +#if ACC_MEM_SHARED + if (i != 2 || j != 1) + abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; +#pragma acc data present_or_create (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data copyin (i, j) + { +#pragma acc data present (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + } + if (i != 2 || j != 1) + abort (); + + i = -1; + j = -2; +#pragma acc data + { +#if !ACC_MEM_SHARED + if (is_mapped (&i, sizeof (i)) || is_mapped (&j, sizeof (j))) + abort (); +#endif + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } + if (i != 2 || j != 1) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c new file mode 100644 index 00000000000..f867a66e5b7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c @@ -0,0 +1,162 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + int N = 128; //1024 * 1024; + float *a, *b, *c, *d, *e; + int i; + int nbytes; + + nbytes = N * sizeof (float); + + a = (float *) malloc (nbytes); + b = (float *) malloc (nbytes); + c = (float *) malloc (nbytes); + d = (float *) malloc (nbytes); + e = (float *) malloc (nbytes); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async +#pragma acc parallel async wait +#pragma acc loop + for (i = 0; i < N; i++) + b[i] = a[i]; + +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait async +#pragma acc wait + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + } + +#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async (1) +#pragma acc parallel async (1) +#pragma acc loop + for (i = 0; i < N; i++) + b[i] = a[i]; + +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait (1) async (1) +#pragma acc wait (1) + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (N) async (1) + +#pragma acc parallel async (1) wait (1) +#pragma acc loop + for (i = 0; i < N; i++) + b[i] = (a[i] * a[i] * a[i]) / a[i]; + +#pragma acc parallel async (2) wait (1) +#pragma acc loop + for (i = 0; i < N; i++) + c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i]; + +#pragma acc parallel async (3) wait (1) +#pragma acc loop + for (i = 0; i < N; i++) + d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i]; + +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) wait (1, 2, 3) async (1) +#pragma acc wait (1) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (e[0:N]) copyin (N) async (1) + +#pragma acc parallel async (1) wait (1) + for (int ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + +#pragma acc parallel async (2) wait (1) + for (int ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + +#pragma acc parallel async (3) wait (1) + for (int ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + +#pragma acc parallel wait (1) async (4) + for (int ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1) +#pragma acc wait (1) + + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 4.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 11.0) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c new file mode 100644 index 00000000000..747109f6136 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c @@ -0,0 +1,166 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + int N = 128; //1024 * 1024; + float *a, *b, *c, *d, *e; + int i; + int nbytes; + + nbytes = N * sizeof (float); + + a = (float *) malloc (nbytes); + b = (float *) malloc (nbytes); + c = (float *) malloc (nbytes); + d = (float *) malloc (nbytes); + e = (float *) malloc (nbytes); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async +#pragma acc parallel async wait +#pragma acc loop + for (i = 0; i < N; i++) + b[i] = a[i]; + +#pragma acc update host (a[0:N], b[0:N]) async wait +#pragma acc wait + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + } + +#pragma acc update device (a[0:N], b[0:N]) async (1) +#pragma acc parallel async (1) +#pragma acc loop + for (i = 0; i < N; i++) + b[i] = a[i]; + +#pragma acc update host (a[0:N], b[0:N]) async (1) wait (1) +#pragma acc wait (1) + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc update device (a[0:N]) async (1) +#pragma acc update device (b[0:N]) async (2) +#pragma acc enter data copyin (c[0:N], d[0:N]) async (3) + +#pragma acc parallel async (1) wait (1,2) +#pragma acc loop + for (i = 0; i < N; i++) + b[i] = (a[i] * a[i] * a[i]) / a[i]; + +#pragma acc parallel async (2) wait (1,3) +#pragma acc loop + for (i = 0; i < N; i++) + c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i]; + +#pragma acc parallel async (3) wait (1,3) +#pragma acc loop + for (i = 0; i < N; i++) + d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i]; + +#pragma acc update host (a[0:N], b[0:N], c[0:N], d[0:N]) async (1) wait (1,2,3) +#pragma acc wait (1) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc update device (a[0:N], b[0:N], c[0:N], d[0:N]) async (1) +#pragma acc enter data copyin (e[0:N]) async (5) + +#pragma acc parallel async (1) wait (1) + for (int ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + +#pragma acc parallel async (2) wait (1) + for (int ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + +#pragma acc parallel async (3) wait (1) + for (int ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + +#pragma acc parallel wait (1,5) async (4) + for (int ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1) +#pragma acc exit data delete (N) +#pragma acc wait (1) + + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 4.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 11.0) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-1.c new file mode 100644 index 00000000000..83c0a42c512 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-1.c @@ -0,0 +1,19 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include + +int +main (int argc, char *argv[]) +{ + int i; + + acc_copyin (&i, sizeof i); + +#pragma acc data copy (i) + ++i; + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "Trying to map into device .* object when .* is already mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-2.c new file mode 100644 index 00000000000..137d8ce09da --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-2.c @@ -0,0 +1,16 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +int +main (int argc, char *argv[]) +{ + int i; + +#pragma acc data present_or_copy (i) +#pragma acc data copyout (i) + ++i; + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "Trying to map into device .* object when .* is already mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-3.c new file mode 100644 index 00000000000..b993b78c5ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-3.c @@ -0,0 +1,17 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include + +int +main (int argc, char *argv[]) +{ + int i; + +#pragma acc data present_or_copy (i) + acc_copyin (&i, sizeof i); + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "already mapped to" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-4.c new file mode 100644 index 00000000000..82523f42f1a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-4.c @@ -0,0 +1,17 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include + +int +main (int argc, char *argv[]) +{ + int i; + + acc_present_or_copyin (&i, sizeof i); + acc_copyin (&i, sizeof i); + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "already mapped to" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-5.c new file mode 100644 index 00000000000..4961fe54406 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-5.c @@ -0,0 +1,17 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include + +int +main (int argc, char *argv[]) +{ + int i; + +#pragma acc enter data create (i) + acc_copyin (&i, sizeof i); + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "already mapped to" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-6.c new file mode 100644 index 00000000000..77b56a93281 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-6.c @@ -0,0 +1,17 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include + +int +main (int argc, char *argv[]) +{ + int i; + + acc_present_or_copyin (&i, sizeof i); +#pragma acc enter data create (i) + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "already mapped to" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-7.c new file mode 100644 index 00000000000..b08417bd6ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-7.c @@ -0,0 +1,17 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include + +int +main (int argc, char *argv[]) +{ + int i; + +#pragma acc enter data create (i) + acc_create (&i, sizeof i); + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "already mapped to" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-8.c new file mode 100644 index 00000000000..a50f7de3a65 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-8.c @@ -0,0 +1,16 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +int +main (int argc, char *argv[]) +{ + int i; + +#pragma acc data create (i) +#pragma acc parallel copyin (i) + ++i; + + return 0; +} + +/* { dg-shouldfail "" } + { dg-output "Trying to map into device .* object when .* is already mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c new file mode 100644 index 00000000000..e271a3770a1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ + +#include + +int main (void) +{ + void *a, *a_1, *a_2; + +#define A (void *) 0x123 + a = A; + +#pragma acc data copyout (a_1, a_2) +#pragma acc kernels deviceptr (a) + { + a_1 = a; + a_2 = &a; + } + + if (a != A) + abort (); + if (a_1 != a) + abort (); +#if ACC_MEM_SHARED + if (a_2 != &a) + abort (); +#else + if (a_2 == &a) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c new file mode 100644 index 00000000000..184b3554bf5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c @@ -0,0 +1,613 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fno-builtin-acc_on_device" } */ + +#include +#include +#include + +#define N 32 + +int +main(int argc, char **argv) +{ + float *a, *b, *d_a, *d_b, exp, exp2; + int i; + const int one = 1; + const int zero = 0; + int n; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + d_a = (float *) acc_malloc (N * sizeof (float)); + d_b = (float *) acc_malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + a[i] = 4.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 5.0; +#else + exp = 4.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc parallel if(0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 17.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 8.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 9.0; +#else + exp = 8.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + +#pragma acc parallel if(zero) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 23.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(true) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 17.0; +#else + exp = 16.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 76.0; + +#pragma acc parallel if(false) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 77.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + + n = 1; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 23.0; +#else + exp = 22.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 18.0; + + n = 0; + +#pragma acc parallel if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 19.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 49.0; + + n = 1; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 50.0; +#else + exp = 49.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 38.0; + + n = 0; + +#pragma acc parallel if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 39.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 91.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(-2) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 92.0; +#else + exp = 91.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 43.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one == 1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 44.0; +#else + exp = 43.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 87.0; + +#pragma acc parallel if(one == 0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 88.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 9.0; + } + +#if ACC_MEM_SHARED + exp = 0.0; + exp2 = 0.0; +#else + acc_map_data (a, d_a, N * sizeof (float)); + acc_map_data (b, d_b, N * sizeof (float)); + exp = 3.0; + exp2 = 9.0; +#endif + +#pragma acc update device(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + if (a[i] != exp) + abort(); + + if (b[i] != exp2) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 12.0; + } + +#pragma acc update device(a[0:N], b[0:N]) if(0) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + if (a[i] != exp) + abort(); + + if (b[i] != exp2) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 26.0; + b[i] = 21.0; + } + +#pragma acc update device(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(0) + + for (i = 0; i < N; i++) + { + if (a[i] != 0.0) + abort(); + + if (b[i] != 0.0) + abort(); + } + +#if !ACC_MEM_SHARED + acc_unmap_data (a); + acc_unmap_data (b); +#endif + + acc_free (d_a); + acc_free (d_b); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1) +{ +#pragma acc parallel present(a[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } +} + + for (i = 0; i < N; i++) + { + if (b[i] != 4.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 8.0; + b[i] = 1.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0) +{ +#if !ACC_MEM_SHARED + if (acc_is_present (a, N * sizeof (float))) + abort (); +#endif + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif +} + + for (i = 0; i < N; i++) + { + a[i] = 18.0; + b[i] = 21.0; + } + +#pragma acc data copyin(a[0:N]) if(1) +{ +#if !ACC_MEM_SHARED + if (!acc_is_present (a, N * sizeof (float))) + abort (); +#endif + +#pragma acc data copyout(b[0:N]) if(0) + { +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc data copyout(b[0:N]) if(1) + { +#pragma acc parallel present(a[0:N]) present(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } + } + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + } +} + + for (i = 0; i < N; i++) + { + if (b[i] != 18.0) + abort (); + } + +#pragma acc enter data copyin (b[0:N]) if (0) + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc exit data delete (b[0:N]) if (0) + +#pragma acc enter data copyin (b[0:N]) if (1) + +#if !ACC_MEM_SHARED + if (!acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc exit data delete (b[0:N]) if (1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc enter data copyin (b[0:N]) if (zero) + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc exit data delete (b[0:N]) if (zero) + +#pragma acc enter data copyin (b[0:N]) if (one) + +#if !ACC_MEM_SHARED + if (!acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc exit data delete (b[0:N]) if (one) + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc enter data copyin (b[0:N]) if (one == 0) + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc exit data delete (b[0:N]) if (one == 0) + +#pragma acc enter data copyin (b[0:N]) if (one == 1) + +#if !ACC_MEM_SHARED + if (!acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc exit data delete (b[0:N]) if (one == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c new file mode 100644 index 00000000000..3acfdf55ef6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c @@ -0,0 +1,184 @@ +/* { dg-do run } */ + +#include + +int i; + +int main (void) +{ + int j, v; + +#if 0 + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != -1 || j != -2) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (i != 2 || j != 1) + abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (i != 2 || j != 1) + abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif + +#if 0 + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); +#endif + +#if 0 + i = -1; + j = -2; + v = 0; +#pragma acc kernels /* copyout */ present_or_copyout (v) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-empty.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-empty.c new file mode 100644 index 00000000000..a68a7cdb032 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-empty.c @@ -0,0 +1,6 @@ +int +main (void) +{ +#pragma acc kernels + ; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c new file mode 100644 index 00000000000..17129d8bebd --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (devtype) == 0) + return 0; +#endif + + acc_init (devtype); + + acc_init (devtype); + + return 0; +} + +/* { dg-shouldfail "libgomp: device already active" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c new file mode 100644 index 00000000000..cf1af8c142f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c @@ -0,0 +1,58 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + void *d; + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; +#endif + + acc_init (devtype); + + d = acc_malloc (0); + if (d != NULL) + abort (); + + acc_free (0); + + acc_shutdown (devtype); + + acc_set_device_type (devtype); + + d = acc_malloc (0); + if (d != NULL) + abort (); + + acc_shutdown (devtype); + + acc_init (devtype); + + d = acc_malloc (1024); + if (d == NULL) + abort (); + + acc_free (d); + + acc_shutdown (devtype); + + acc_set_device_type (devtype); + + d = acc_malloc (1024); + if (d == NULL) + abort (); + + acc_free (d); + + acc_shutdown (devtype); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c new file mode 100644 index 00000000000..eccdb8cbacf --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c @@ -0,0 +1,23 @@ +/* Only nvptx plugin does the required error checking. + { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 512; + void *d; + + d = acc_malloc (N); + if (d == NULL) + abort (); + + acc_free ((void *)((uintptr_t) d + (uintptr_t) (N >> 1))); + + return 0; +} + +/* { dg-shouldfail "libgomp: mem free failed 1" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c new file mode 100644 index 00000000000..b46f59028b7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + memset (h, 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c new file mode 100644 index 00000000000..7098ef3549c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ + +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h, N + 1) != 0) + abort (); + + if (acc_is_present (h + 1, N) != 0) + abort (); + + if (acc_is_present (h - 1, N) != 0) + abort (); + + if (acc_is_present (h - 1, N - 1) != 0) + abort (); + + if (acc_is_present (h + N, 0) != 0) + abort (); + + if (acc_is_present (h + N, N) != 0) + abort (); + + if (acc_is_present (0, N) != 0) + abort (); + + if (acc_is_present (h, 0) != 0) + abort (); + + acc_free (d); + + if (acc_is_present (h, 1) != 0) + abort (); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c new file mode 100644 index 00000000000..a9632f786ee --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ + +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h + N - 1, 1) != 1) + abort (); + + if (acc_is_present (h - 1, 1) != 0) + abort (); + + if (acc_is_present (h + N, 1) != 0) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, N - i) != 1) + abort (); + } + + acc_free (d); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, N - i) != 0) + abort (); + } + + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c new file mode 100644 index 00000000000..4f6a731be1d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c new file mode 100644 index 00000000000..9d277ac39f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + (void) acc_copyin (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c new file mode 100644 index 00000000000..5ff894cb33a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N); + + acc_copyout (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c new file mode 100644 index 00000000000..2bc32637d36 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ + +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + acc_free (d); + + acc_copyout (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c new file mode 100644 index 00000000000..3581616d754 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + h[i] = (unsigned char *) malloc (N); + p = h[i]; + + for (j = 0; j < N; j++) + { + p[j] = i; + } + + (void) acc_copyin (p, N); + } + + for (i = 0; i < N; i++) + { + memset (h[i], 0, i); + } + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + acc_copyout (h[i], N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + } + + for (i = 0; i < N; i++) + { + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c new file mode 100644 index 00000000000..9a4501f0e00 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; +#endif + + acc_init (devtype); + + acc_shutdown (devtype); + + acc_shutdown (devtype); + + return 0; +} + +/* { dg-shouldfail "libgomp: no device initialized" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c new file mode 100644 index 00000000000..b379a8f7add --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N + 1); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c new file mode 100644 index 00000000000..3a67400a620 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, 0); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c new file mode 100644 index 00000000000..2b86da80580 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h + 1, N - 1); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c new file mode 100644 index 00000000000..38f236dcea9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h1, *h2; + + h1 = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h1[i] = 0xab; + } + + (void) acc_copyin (h1, N); + + h2 = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h2[i] = 0xde; + } + + (void) acc_copyin (h2, N); + + acc_copyout (h1, N + N); + + free (h1); + free (h2); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c new file mode 100644 index 00000000000..d7de8e34f4c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + acc_delete (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + d = acc_create (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + acc_delete (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c new file mode 100644 index 00000000000..11458283b2c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c new file mode 100644 index 00000000000..a23f56e753f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, 0); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c new file mode 100644 index 00000000000..074fddb2ab7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (0, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c new file mode 100644 index 00000000000..027f7cc6460 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (0, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c new file mode 100644 index 00000000000..a66de0f538d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, 0); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c new file mode 100644 index 00000000000..e823a4153fa --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + acc_init (acc_device_host); + + acc_shutdown (acc_device_not_host); + + return 0; +} + +/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c new file mode 100644 index 00000000000..ce2bdb44739 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, N - 2); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c new file mode 100644 index 00000000000..25ce5a95977 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (h, N); + if (!d) + abort (); + + if (acc_is_present (h, 1) != 1) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c new file mode 100644 index 00000000000..e3f87a85672 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + d2 = acc_pcreate (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c new file mode 100644 index 00000000000..4abaa02e085 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h, N - 2); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c new file mode 100644 index 00000000000..32d5d5117e6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h + 2, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c new file mode 100644 index 00000000000..ca8edab52a2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (0, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c new file mode 100644 index 00000000000..cb29397e14e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (h, 0); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c new file mode 100644 index 00000000000..5a7d533a670 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c new file mode 100644 index 00000000000..05d8498c1f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d1 = acc_present_or_copyin (h, N); + if (!d1) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + d2 = acc_present_or_copyin (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + d2 = acc_pcopyin (h, N); + if (!d2) + abort (); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c new file mode 100644 index 00000000000..db1e0b3ffd5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (0, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c new file mode 100644 index 00000000000..060275b999b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + acc_init ((acc_device_t) 99); + + return 0; +} + +/* { dg-shouldfail "libgomp: device 99 is out of range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c new file mode 100644 index 00000000000..cb6c422872e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (h, 0); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c new file mode 100644 index 00000000000..01c5f3cfaa3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c new file mode 100644 index 00000000000..d577fe37e53 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + acc_update_device (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c new file mode 100644 index 00000000000..ceeb155fe44 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c new file mode 100644 index 00000000000..0cabb0d31d9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, 0); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c new file mode 100644 index 00000000000..f9a629465b4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, N - 2); + + acc_copyout (h, N); + + for (i = 0; i < N - 2; i++) + { + if (h[i] != 0xab) + abort (); + } + + for (i = N - 2; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c new file mode 100644 index 00000000000..b195725c92c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c new file mode 100644 index 00000000000..a7ff904c97c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (0, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c new file mode 100644 index 00000000000..01d3c6c8280 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, 0); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c new file mode 100644 index 00000000000..a33324c31ba --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, N - 2); + + for (i = 0; i < N - 2; i++) + { + if (h[i] != i) + abort (); + } + + for (i = N - 2; i < N; i++) + { + if (h[i] != 0) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c new file mode 100644 index 00000000000..961a62c2b63 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_device_type () == acc_device_default) + abort (); + + acc_init (acc_device_default); + + if (acc_get_device_type () == acc_device_default) + abort (); + + acc_shutdown (acc_device_default); + + if (acc_get_num_devices (acc_device_nvidia) != 0) + { + acc_init (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + + acc_init (acc_device_default); + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + } + + return 0; + +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c new file mode 100644 index 00000000000..e8294e1af36 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + if (acc_is_present (h, N) != 1) + abort (); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c new file mode 100644 index 00000000000..29d28f298c0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + void *d[N]; + + for (i = 0; i < N; i++) + { + h[i] = (unsigned char *) malloc (N); + d[i] = acc_malloc (N); + + acc_map_data (h[i], d[i], N); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h[i], N) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + acc_unmap_data (h[i]); + + if (acc_is_present (h[i], N) != 0) + abort (); + + acc_free (d[i]); + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c new file mode 100644 index 00000000000..780db310fa1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (0, d, N); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c new file mode 100644 index 00000000000..657adde3b02 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, 0, N); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c new file mode 100644 index 00000000000..1f3df80bd41 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, 0); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c new file mode 100644 index 00000000000..286653f3339 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + int i; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + for (i = 0; i < N; i++) + { + acc_map_data ((void *)((uintptr_t) h + (uintptr_t) i), + (void *)((uintptr_t) d + (uintptr_t) i), 1); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + 1, 1) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + acc_unmap_data (h + i); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + 1, 1) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c new file mode 100644 index 00000000000..e3f5a80518f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N >> 1); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h + (N >> 1), 1) != 0) + abort (); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c new file mode 100644 index 00000000000..f9043a445df --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + acc_unmap_data (d); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c new file mode 100644 index 00000000000..9d6e27d89e9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + acc_unmap_data (0); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c new file mode 100644 index 00000000000..2f087aedccb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + for (i = 0; i < N; i++) + { + if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) != + (void *)((uintptr_t) h + (uintptr_t) i)) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_deviceptr ((void *)((uintptr_t) h + (uintptr_t) i)) != + (void *)((uintptr_t) d + (uintptr_t) i)) + abort (); + } + + acc_unmap_data (h); + + for (i = 0; i < N; i++) + { + if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) != 0) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_deviceptr (h + i) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c new file mode 100644 index 00000000000..afdd480cb71 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + int devnum; + + if (acc_get_device_type () == acc_device_default) + abort (); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + devnum = acc_get_num_devices (acc_device_host); + if (devnum != 1) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_device_type () == acc_device_default) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c new file mode 100644 index 00000000000..ccae728e3dc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c new file mode 100644 index 00000000000..ce66ced28e8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + void *d[N]; + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + h[i] = (unsigned char *) malloc (N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + p[j] = i; + } + + d[i] = acc_malloc (N); + + acc_memcpy_to_device (d[i], h[i], N); + + for (j = 0; j < N; j++) + { + if (acc_is_present (h[i] + j, 1) != 0) + abort (); + } + } + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + memset (h[i], 0, N); + + acc_memcpy_from_device (h[i], d[i], N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + for (j = 0; j < N; j++) + { + if (acc_is_present (h[i] + j, 1) != 0) + abort (); + } + + acc_free (d[i]); + + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c new file mode 100644 index 00000000000..e6178e233ed --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_to_device (d, h, N << 1); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid size" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c new file mode 100644 index 00000000000..ca237ec1b60 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (0, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c new file mode 100644 index 00000000000..850fd2e3f91 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, 0, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c new file mode 100644 index 00000000000..26c8cefdc42 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, d, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host or device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c new file mode 100644 index 00000000000..398dc2a4cef --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + acc_init (acc_device_default); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_to_device (d, h, 0); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_default); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c new file mode 100644 index 00000000000..01b8b2d89ae --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (0, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c new file mode 100644 index 00000000000..3ff5bd7bcf7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, 0, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c new file mode 100644 index 00000000000..5462f125352 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c @@ -0,0 +1,124 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (0) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep (1); + + if (acc_async_test (0) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c new file mode 100644 index 00000000000..e78734b3011 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_none) != 0) + abort (); + + if (acc_get_num_devices (acc_device_host) == 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c new file mode 100644 index 00000000000..912b266ec39 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c @@ -0,0 +1,136 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + const int N = 10; + int i; + CUstream streams[N]; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (i) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + } + + sleep ((int) (dtime / 1000.0f) + 1); + + for (i = 0; i < N; i++) + { + if (acc_async_test (i) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c new file mode 100644 index 00000000000..a04537916f6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c @@ -0,0 +1,119 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (0, stream); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (1) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.0f) + 1); + + if (acc_async_test (1) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: unknown async \d" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c new file mode 100644 index 00000000000..e383ba04d69 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c @@ -0,0 +1,121 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test_all () != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.f) + 1); + + if (acc_async_test_all () != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c new file mode 100644 index 00000000000..43a8b7e6395 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c @@ -0,0 +1,134 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + const int N = 10; + int i; + CUstream streams[N]; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + } + + if (acc_async_test_all () != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.0f) + 1); + + if (acc_async_test_all () != 1) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c new file mode 100644 index 00000000000..0726ee420c3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c @@ -0,0 +1,139 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (0); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait (0); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c new file mode 100644 index 00000000000..19422118748 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c @@ -0,0 +1,141 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (0); + } + + atime = stop_timer (0); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c new file mode 100644 index 00000000000..11d9d621f87 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c @@ -0,0 +1,147 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (i); + } + + atime = stop_timer (0); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (streams); + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c new file mode 100644 index 00000000000..e47212b2139 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c @@ -0,0 +1,135 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (0, stream); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (1); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait (1); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: unknown async \d" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c new file mode 100644 index 00000000000..4f58fb23cfe --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c @@ -0,0 +1,140 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait_all (); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait_all (); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c new file mode 100644 index 00000000000..ef3df13ebc9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c @@ -0,0 +1,167 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + devnum = 2; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (1, stream)) + abort (); + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_async (0, 1); + + if (acc_async_test (0) != 0) + abort (); + + if (acc_async_test (1) != 0) + abort (); + + acc_wait (1); + + atime = stop_timer (0); + + if (acc_async_test (0) != 1) + abort (); + + if (acc_async_test (1) != 1) + abort (); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c new file mode 100644 index 00000000000..0b5ec24b28e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c @@ -0,0 +1,132 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + int N; + int i; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (1, stream); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_async (1, 1); + + acc_wait (1); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: identical parameters" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c new file mode 100644 index 00000000000..d5f18f00319 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c @@ -0,0 +1,211 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams, stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 500.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (N); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (N, stream)) + abort (); + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_all_async (N); + + for (i = 0; i <= N; i++) + { + if (acc_async_test (i) != 0) + abort (); + } + + acc_wait (N); + + for (i = 0; i <= N; i++) + { + if (acc_async_test (i) != 1) + abort (); + } + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + stream = (CUstream) acc_get_cuda_stream (N + 1); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (N + 1, stream)) + abort (); + + acc_wait_all_async (N + 1); + + acc_wait (N + 1); + + atime = stop_timer (0); + + if (0.10 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + start_timer (0); + + acc_wait_all_async (N); + + acc_wait (N); + + atime = stop_timer (0); + + if (0.10 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (streams); + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c new file mode 100644 index 00000000000..be30a7f28ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c @@ -0,0 +1,144 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay2; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams; + unsigned long **a, **d_a, *tid, ticks; + int nbytes; + void *kargs[3]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay2, module, "delay2"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = sizeof (int); + + ticks = (unsigned long) (200.0 * clkrate); + + N = nprocs; + + streams = (CUstream *) malloc (N * sizeof (void *)); + + a = (unsigned long **) malloc (N * sizeof (unsigned long *)); + d_a = (unsigned long **) malloc (N * sizeof (unsigned long *)); + tid = (unsigned long *) malloc (N * sizeof (unsigned long)); + + for (i = 0; i < N; i++) + { + a[i] = (unsigned long *) malloc (sizeof (unsigned long)); + *a[i] = N; + d_a[i] = (unsigned long *) acc_malloc (nbytes); + tid[i] = i; + + acc_map_data (a[i], d_a[i], nbytes); + + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + kargs[0] = (void *) &d_a[i]; + kargs[1] = (void *) &ticks; + kargs[2] = (void *) &tid[i]; + + r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + ticks = (unsigned long) (50.0 * clkrate); + } + + acc_wait_all_async (0); + + for (i = 0; i < N; i++) + { + acc_copyout (a[i], nbytes); + if (*a[i] != i) + abort (); + } + + free (streams); + + for (i = 0; i < N; i++) + { + free (a[i]); + } + + free (a); + free (d_a); + free (tid); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c new file mode 100644 index 00000000000..1c2e52b4c5f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c @@ -0,0 +1,58 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + float atime; + CUstream stream; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + init_timers (1); + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + start_timer (0); + + acc_wait_all_async (0); + + acc_wait (0); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + fini_timers (); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c new file mode 100644 index 00000000000..786b908f755 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c @@ -0,0 +1,66 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 100; + int i; + CUstream *streams; + CUstream s; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + int j; + int cnt; + + cnt = 0; + + s = streams[i]; + + for (j = 0; j < N; j++) + { + if (s == streams[j]) + cnt++; + } + + if (cnt != 1) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c new file mode 100644 index 00000000000..cf925a7b002 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c @@ -0,0 +1,52 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 100; + int i; + CUstream *streams; + CUstream s; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + s = NULL; + + if (acc_set_cuda_stream (N + 1, s) != 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c new file mode 100644 index 00000000000..b8a8ee94a58 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_init (acc_device_host); + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_shutdown (acc_device_host); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_init (acc_device_nvidia); + + if (acc_get_current_cuda_device () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_current_cuda_device () != 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c new file mode 100644 index 00000000000..147d443b54d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_init (acc_device_host); + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_shutdown (acc_device_host); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_init (acc_device_nvidia); + + if (acc_get_current_cuda_context () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_current_cuda_context () != 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c new file mode 100644 index 00000000000..10f4ad8664a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c @@ -0,0 +1,111 @@ +/* { dg-do run } */ + +#include +#include +#include +#include +#include +#include + +unsigned char *x; +void *d_x; +const int N = 256; + +static void * +test (void *arg) +{ + int i; + + if (acc_get_current_cuda_context () != NULL) + abort (); + + if (acc_is_present (x, N) != 1) + abort (); + + memset (x, 0, N); + + acc_copyout (x, N); + + for (i = 0; i < N; i++) + { + if (x[i] != i) + abort (); + + x[i] = N - i - 1; + } + + d_x = acc_copyin (x, N); + + return 0; +} + +int +main (int argc, char **argv) +{ + const int nthreads = 1; + int i; + pthread_attr_t attr; + pthread_t *tid; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + x[i] = i; + } + + d_x = acc_copyin (x, N); + + if (acc_is_present (x, N) != 1) + abort (); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t)); + + for (i = 0; i < nthreads; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < nthreads; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + if (acc_is_present (x, N) != 1) + abort (); + + memset (x, 0, N); + + acc_copyout (x, N); + + for (i = 0; i < N; i++) + { + if (x[i] != N - i - 1) + abort (); + } + + if (acc_is_present (x, N) != 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c new file mode 100644 index 00000000000..061c4099c2d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c @@ -0,0 +1,118 @@ +/* { dg-do run } */ + +#include +#include +#include +#include +#include +#include +#include + +unsigned char **x; +void **d_x; +const int N = 16; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + p = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + p[i] = tid; + } + + x[tid] = p; + + d_x[tid] = acc_copyin (p, N); + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = (unsigned char **) malloc (NTHREADS * N); + d_x = (void **) malloc (NTHREADS * N); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 1) + abort (); + } + + for (i = 0; i < NTHREADS; i++) + { + memset (x[i], 0, N); + acc_copyout (x[i], N); + } + + for (i = 0; i < NTHREADS; i++) + { + unsigned char *p; + int j; + + p = x[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c new file mode 100644 index 00000000000..84045dbe328 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + int i; + int num_devices; + int devnum; + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; +#endif + + num_devices = acc_get_num_devices (devtype); + if (num_devices == 0) + return 0; + + acc_init (devtype); + + for (i = 0; i < num_devices; i++) + { + acc_set_device_num (i, devtype); + devnum = acc_get_device_num (devtype); + if (devnum != i) + abort (); + } + + acc_shutdown (devtype); + + num_devices = acc_get_num_devices (devtype); + if (num_devices == 0) + abort (); + + for (i = 0; i < num_devices; i++) + { + acc_set_device_num (i, devtype); + devnum = acc_get_device_num (devtype); + if (devnum != i) + abort (); + } + + acc_shutdown (devtype); + + acc_init (devtype); + + acc_set_device_num (0, devtype); + + devnum = acc_get_device_num (devtype); + if (devnum != 0) + abort (); + + if (num_devices > 1) + { + acc_set_device_num (1, (acc_device_t) 0); + + devnum = acc_get_device_num (devtype); + if (devnum != 0) + abort (); + } + + acc_shutdown (devtype); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c new file mode 100644 index 00000000000..d17755bf3eb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c @@ -0,0 +1,137 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned char **x; +void **d_x; +const int N = 16; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + p = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + p[i] = tid; + } + + x[tid] = p; + + d_x[tid] = acc_copyin (p, N); + + acc_wait_all (); + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + CUresult r; + CUstream s; + + acc_init (acc_device_nvidia); + + x = (unsigned char **) malloc (NTHREADS * N); + d_x = (void **) malloc (NTHREADS * N); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + r = cuStreamCreate (&s, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, s)) + abort (); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 1) + abort (); + } + + acc_get_cuda_stream (1); + + for (i = 0; i < NTHREADS; i++) + { + memset (x[i], 0, N); + acc_copyout (x[i], N); + } + + acc_wait_all (); + + for (i = 0; i < NTHREADS; i++) + { + unsigned char *p; + int j; + + p = x[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c new file mode 100644 index 00000000000..e00ef4f7206 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c @@ -0,0 +1,84 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 1024 * 1024; + int i; + unsigned char *h; + void *d; + float async, sync; + struct timeval start, stop; + CUresult r; + CUstream s; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + gettimeofday (&start, NULL); + + for (i = 0; i < 100; i++) + { +#pragma acc update device(h[0:N]) + } + + gettimeofday (&stop, NULL); + + sync = (float) (stop.tv_sec - start.tv_sec); + sync += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0); + + gettimeofday (&start, NULL); + + r = cuStreamCreate (&s, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, s)) + abort (); + + for (i = 0; i < 100; i++) + { +#pragma acc update device(h[0:N]) async(0) + } + + acc_wait_all (); + + gettimeofday (&stop, NULL); + + async = (float) (stop.tv_sec - start.tv_sec); + async += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0); + + if (async > (sync * 1.5)) + abort (); + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c new file mode 100644 index 00000000000..18193e0e8f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c @@ -0,0 +1,112 @@ +/* { dg-do run } */ + +#include +#include +#include +#include +#include +#include + +unsigned char **x; +void **d_x; +const int N = 32; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + acc_copyout (x[tid], N); + + p = x[tid]; + + for (i = 0; i < N; i++) + { + if (p[i] != i) + abort (); + } + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + unsigned char *p; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = (unsigned char **) malloc (NTHREADS * N); + d_x = (void **) malloc (NTHREADS * N); + + for (i = 0; i < N; i++) + { + int j; + + p = (unsigned char *) malloc (N); + + x[i] = p; + + for (j = 0; j < N; j++) + { + p[j] = j; + } + + d_x[i] = acc_copyin (p, N); + } + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + acc_get_cuda_stream (1); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-1.c new file mode 100644 index 00000000000..ededf2ba5b7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-1.c @@ -0,0 +1,680 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc data copyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc data copyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc data present_or_copyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 2.0; + } + + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 4.0; + } + +#pragma acc data copy (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = a[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 7.0; + } + +#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 9.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 7.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (&a[0]); + acc_unmap_data (&a[0]); + acc_free (d); + + d = (float *) acc_deviceptr (&b[0]); + acc_unmap_data (&b[0]); + acc_free (d); + + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + +#pragma acc data copyin (a[0:N]) create (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + +#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data copyin (a[0:N]) present (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_unmap_data (c); + + if (acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_free (d); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (b, d, N * sizeof (float)); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort (); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (a, d, N * sizeof (float)); + + if (!acc_is_present (a, (N * sizeof (float)))) + abort (); + +#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = 1.0; + c[ii] = 2.0; + b[ii] = 4.0; + } + } + } + + if (!acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_copyout (b, N * sizeof (float)); + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + d = (float *) acc_deviceptr (a); + + acc_unmap_data (a); + + acc_free (d); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 6.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + d[ii] = a[ii]; + b[ii] = d[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc data pcopyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc data copyin (a[0:N]) pcopyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 7.0; + } + +#pragma acc data copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-2.c new file mode 100644 index 00000000000..c16459826af --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-2.c @@ -0,0 +1,141 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char *argv[]) +{ +#define N 10 + char a[N]; + int i; + + for (i = 0; i < N; ++i) + a[i] = 0; + +#pragma acc data copy (a) + { +#pragma acc parallel present (a) + { + int j; + + for (j = 0; j < N; ++j) + a[j] = j; + } + } + + for (i = 0; i < N; ++i) + { + if (a[i] != i) + abort (); + } + + for (i = 0; i < N; ++i) + a[i] = 0; + +#pragma acc data copy (a) + { +#pragma acc kernels present (a) + { + int j; + + for (j = 0; j < N; ++j) + a[j] = j; + } + } + + for (i = 0; i < N; ++i) + { + if (a[i] != i) + abort (); + } + + for (i = 0; i < N; ++i) + a[i] = 0; + +#pragma acc data copy (a) + { +#pragma acc data present (a) + { +#pragma acc parallel present (a) + { + int j; + + for (j = 0; j < N; ++j) + a[j] = j; + } + } + } + + for (i = 0; i < N; ++i) + { + if (a[i] != i) + abort (); + } + +#pragma acc data copy (a) + { +#pragma acc data present (a) + { +#pragma acc kernels present (a) + { + int j; + + for (j = 0; j < N; ++j) + a[j] = j; + } + } + } + + for (i = 0; i < N; ++i) + { + if (a[i] != i) + abort (); + } + + for (i = 0; i < N; ++i) + a[i] = 0; + +#pragma acc enter data copyin (a) + +#pragma acc data present (a) + { +#pragma acc parallel present (a) + { + int j; + + for (j = 0; j < N; ++j) + a[j] = j; + } + } + +#pragma acc exit data copyout (a) + + for (i = 0; i < N; ++i) + { + if (a[i] != i) + abort (); + } + +#pragma acc enter data copyin (a) + +#pragma acc data present (a) + { +#pragma acc kernels present (a) + { + int j; + + for (j = 0; j < N; ++j) + a[j] = j; + } + } + +#pragma acc exit data copyout (a) + + for (i = 0; i < N; ++i) + { + if (a[i] != i) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/offset-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/offset-1.c new file mode 100644 index 00000000000..0bae23a46f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/offset-1.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + +#pragma acc parallel copyin(a[2:4]) copyout(b[2:4]) + { + b[2] = a[2]; + b[3] = a[3]; + } + + for (i = 2; i < 4; i++) + { + if (a[i] != 2.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 1.0; + } + +#pragma acc parallel copyin(a[0:4]) copyout(b[0:4]) + { + b[0] = a[0]; + b[1] = a[1]; + b[2] = a[2]; + b[3] = a[3]; + } + + for (i = 0; i < 4; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + b[i] = 6.0; + } + +#pragma acc parallel copyin(a[0:4]) copyout(b[4:4]) + { + b[4] = a[0]; + b[5] = a[1]; + b[6] = a[2]; + b[7] = a[3]; + } + + for (i = 0; i < 4; i++) + { + if (a[i] != 9.0) + abort(); + } + + for (i = 4; i < 8; i++) + { + if (b[i] != 9.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c new file mode 100644 index 00000000000..fd9df33748f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c @@ -0,0 +1,206 @@ +/* { dg-do run } */ + +#include + +int i; + +int main(void) +{ + int j, v; + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1 || i != 2 || j != 1) + abort (); + + i = -1; + j = -2; + v = 0; +#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j) + { + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; + +#pragma acc data copyin (i, j) + { +#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + i = -1; + j = -2; + v = 0; + +#pragma acc data copyin(i, j) + { +#pragma acc parallel /* copyout */ present_or_copyout (v) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } + } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-empty.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-empty.c new file mode 100644 index 00000000000..8e3bb43f952 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-empty.c @@ -0,0 +1,6 @@ +int +main (void) +{ +#pragma acc parallel + ; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c new file mode 100644 index 00000000000..f7d5b9bd6e8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +/* PR middle-end/63247 */ + +#include + +int +main(int argc, char **argv) +{ +#define N 4 + short a[N]; + + a[0] = 10; + a[1] = 10; + a[2] = 10; + a[3] = 10; + +#pragma acc parallel copy(a[1:N-1]) + { + a[1] = 51; + a[2] = 52; + a[3] = 53; + } + + if (a[0] != 10) + abort (); + if (a[1] != 51) + abort (); + if (a[2] != 52) + abort (); + if (a[3] != 53) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/present-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-1.c new file mode 100644 index 00000000000..f331f1f6370 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-1.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + d = (float *) acc_deviceptr (c); + acc_unmap_data (c); + acc_free (d); + + free (a); + free (b); + free (c); + + return 0; +} +/* { dg-shouldfail "libgomp: present clause: !acc_is_present" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/present-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-2.c new file mode 100644 index 00000000000..41efa70e4ec --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-2.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) + { + +#pragma acc parallel present(a[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c new file mode 100644 index 00000000000..acf95402ba1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c @@ -0,0 +1,174 @@ +/* { dg-do run } */ + +/* Integer reductions. */ + +#include +#include + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + int vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult *= array[i]; + + if (result != vresult) + abort (); + +// result = 0; +// vresult = 0; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 0; + vresult = 0; + + /* '&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&:result) + for (i = 0; i < n; i++) + result &= array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult &= array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '|' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (|:result) + for (i = 0; i < n; i++) + result |= array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult |= array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '^' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (^:result) + for (i = 0; i < n; i++) + result ^= array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult ^= array[i]; + + if (result != vresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (result > array[i]); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (result > array[i]); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c new file mode 100644 index 00000000000..c2ec110abd2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ + +/* float reductions. */ + +#include +#include +#include + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + float vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult *= array[i]; + + if (fabs(result - vresult) > .0001) + abort (); +// result = 0; +// vresult = 0; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (result > array[i]); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (result > array[i]); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c new file mode 100644 index 00000000000..58b49ffa4be --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ + +/* double reductions. */ + +#include +#include +#include + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + double vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* '*' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (*:result) + for (i = 0; i < n; i++) + result *= array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult *= array[i]; + + if (fabs(result - vresult) > .0001) + abort (); +// result = 0; +// vresult = 0; +// +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (result > array[i]); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (result > array[i]); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (result > array[i]); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (result > array[i]); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c new file mode 100644 index 00000000000..c8a9a6c0d36 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c @@ -0,0 +1,129 @@ +/* { dg-do run } */ + +/* complex reductions. */ + +#include +#include +#include +#include + +#define vl 32 + +int +main(void) +{ + const int n = 1000; + int i; + double complex vresult, result, array[n]; + bool lvresult, lresult; + + for (i = 0; i < n; i++) + array[i] = i; + + result = 0; + vresult = 0; + + /* '+' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result += array[i]; + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + vresult += array[i]; + + if (result != vresult) + abort (); + + result = 0; + vresult = 0; + + /* Needs support for complex multiplication. */ + +// /* '*' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (*:result) +// for (i = 0; i < n; i++) +// result *= array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult *= array[i]; +// +// if (fabs(result - vresult) > .0001) +// abort (); +// result = 0; +// vresult = 0; + +// /* 'max' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result > array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult > array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); +// +// result = 0; +// vresult = 0; +// +// /* 'min' reductions. */ +// #pragma acc parallel vector_length (vl) +// #pragma acc loop reduction (+:result) +// for (i = 0; i < n; i++) +// result = result < array[i] ? result : array[i]; +// +// /* Verify the reduction. */ +// for (i = 0; i < n; i++) +// vresult = vresult < array[i] ? vresult : array[i]; +// +// printf("%d != %d\n", result, vresult); +// if (result != vresult) +// abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '&&' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (&&:lresult) + for (i = 0; i < n; i++) + lresult = lresult && (creal(result) > creal(array[i])); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult && (creal(result) > creal(array[i])); + + if (lresult != lvresult) + abort (); + + result = 5; + vresult = 5; + + lresult = false; + lvresult = false; + + /* '||' reductions. */ +#pragma acc parallel vector_length (vl) +#pragma acc loop reduction (||:lresult) + for (i = 0; i < n; i++) + lresult = lresult || (creal(result) > creal(array[i])); + + /* Verify the reduction. */ + for (i = 0; i < n; i++) + lvresult = lresult || (creal(result) > creal(array[i])); + + if (lresult != lvresult) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c new file mode 100644 index 00000000000..757b8bec245 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c @@ -0,0 +1,32 @@ +#include +#include + +int +main (void) +{ + int s1 = 2, s2 = 5, v1 = 2, v2 = 5; + int n = 100; + int i; + +#pragma acc parallel vector_length (1000) +#pragma acc loop reduction (+:s1, s2) + for (i = 0; i < n; i++) + { + s1 = s1 + 3; + s2 = s2 + 2; + } + + for (i = 0; i < n; i++) + { + v1 = v1 + 3; + v2 = v2 + 2; + } + + if (s1 != v1) + abort (); + + if (s2 != v2) + abort (); + + return 0; +} \ No newline at end of file diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c new file mode 100644 index 00000000000..81cf865e61b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ + +int +main(void) +{ +#define I 5 +#define N 11 +#define A 8 + + int a = A; + int s = I; + +#pragma acc parallel vector_length(N) + { + int i; +#pragma acc loop reduction(+:s) + for (i = 0; i < N; ++i) + s += a; + } + + if (s != I + N * A) + __builtin_abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h new file mode 100644 index 00000000000..9db236c8362 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h @@ -0,0 +1,46 @@ + +#if ACC_DEVICE_TYPE_nvidia + +#pragma acc routine nohost +static int clock (void) +{ + int thetime; + + asm __volatile__ ("mov.u32 %0, %%clock;" : "=r"(thetime)); + + return thetime; +} + +#endif + +void +delay (unsigned long *d_o, unsigned long delay) +{ + int start, ticks; + + start = clock (); + + ticks = 0; + + while (ticks < delay) + ticks = clock () - start; + + return; +} + +void +delay2 (unsigned long *d_o, unsigned long delay, unsigned long tid) +{ + int start, ticks; + + start = clock (); + + ticks = 0; + + while (ticks < delay) + ticks = clock () - start; + + d_o[0] = tid; + + return; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx new file mode 100644 index 00000000000..6f748fcaf9e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx @@ -0,0 +1,148 @@ +// BEGIN PREAMBLE + .version 3.1 + .target sm_30 + .address_size 64 +// END PREAMBLE + +// BEGIN FUNCTION DEF: clock +.func (.param.u32 %out_retval)clock +{ +.reg.u32 %retval; + .reg.u64 %hr10; + .reg.u32 %r22; + .reg.u32 %r23; + .reg.u32 %r24; + .local.align 8 .b8 %frame[8]; + // #APP +// 7 "subr.c" 1 + mov.u32 %r24, %clock; +// 0 "" 2 + // #NO_APP + st.local.u32 [%frame], %r24; + ld.local.u32 %r22, [%frame]; + mov.u32 %r23, %r22; + mov.u32 %retval, %r23; + st.param.u32 [%out_retval], %retval; + ret; + } +// END FUNCTION DEF +// BEGIN GLOBAL FUNCTION DEF: delay +.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2) +{ + .reg.u64 %ar1; + .reg.u64 %ar2; + .reg.u64 %hr10; + .reg.u64 %r22; + .reg.u32 %r23; + .reg.u64 %r24; + .reg.u64 %r25; + .reg.u32 %r26; + .reg.u32 %r27; + .reg.u32 %r28; + .reg.u32 %r29; + .reg.u32 %r30; + .reg.u64 %r31; + .reg.pred %r32; + .local.align 8 .b8 %frame[24]; + ld.param.u64 %ar1, [%in_ar1]; + ld.param.u64 %ar2, [%in_ar2]; + mov.u64 %r24, %ar1; + st.u64 [%frame+8], %r24; + mov.u64 %r25, %ar2; + st.local.u64 [%frame+16], %r25; + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r26, [%retval_in]; +} + st.local.u32 [%frame+4], %r26; + mov.u32 %r27, 0; + st.local.u32 [%frame], %r27; + bra $L4; +$L5: + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r28, [%retval_in]; +} + mov.u32 %r23, %r28; + ld.local.u32 %r30, [%frame+4]; + sub.u32 %r29, %r23, %r30; + st.local.u32 [%frame], %r29; +$L4: + ld.local.s32 %r22, [%frame]; + ld.local.u64 %r31, [%frame+16]; + setp.lo.u64 %r32,%r22,%r31; + @%r32 bra $L5; + ret; + } +// END FUNCTION DEF +// BEGIN GLOBAL FUNCTION DEF: delay2 +.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3) +{ + .reg.u64 %ar1; + .reg.u64 %ar2; + .reg.u64 %ar3; + .reg.u64 %hr10; + .reg.u64 %r22; + .reg.u32 %r23; + .reg.u64 %r24; + .reg.u64 %r25; + .reg.u64 %r26; + .reg.u32 %r27; + .reg.u32 %r28; + .reg.u32 %r29; + .reg.u32 %r30; + .reg.u32 %r31; + .reg.u64 %r32; + .reg.pred %r33; + .reg.u64 %r34; + .reg.u64 %r35; + .local.align 8 .b8 %frame[32]; + ld.param.u64 %ar1, [%in_ar1]; + ld.param.u64 %ar2, [%in_ar2]; + ld.param.u64 %ar3, [%in_ar3]; + mov.u64 %r24, %ar1; + st.local.u64 [%frame+8], %r24; + mov.u64 %r25, %ar2; + st.local.u64 [%frame+16], %r25; + mov.u64 %r26, %ar3; + st.local.u64 [%frame+24], %r26; + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r27, [%retval_in]; +} + st.local.u32 [%frame+4], %r27; + mov.u32 %r28, 0; + st.local.u32 [%frame], %r28; + bra $L8; +$L9: + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r29, [%retval_in]; +} + mov.u32 %r23, %r29; + ld.local.u32 %r31, [%frame+4]; + sub.u32 %r30, %r23, %r31; + st.local.u32 [%frame], %r30; +$L8: + ld.local.s32 %r22, [%frame]; + ld.local.u64 %r32, [%frame+16]; + setp.lo.u64 %r33,%r22,%r32; + @%r33 bra $L9; + ld.local.u64 %r34, [%frame+8]; + ld.local.u64 %r35, [%frame+24]; + st.u64 [%r34], %r35; + ret; + } +// END FUNCTION DEF diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h new file mode 100644 index 00000000000..53749da5a0d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h @@ -0,0 +1,103 @@ + +#include +#include + +static int _Tnum_timers; +static CUevent *_Tstart_events, *_Tstop_events; +static CUstream _Tstream; + +void +init_timers (int ntimers) +{ + int i; + CUresult r; + + _Tnum_timers = ntimers; + + _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); + _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); + + r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + for (i = 0; i < _Tnum_timers; i++) + { + r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventCreate failed: %d\n", r); + abort (); + } + + r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventCreate failed: %d\n", r); + abort (); + } + } +} + +void +fini_timers (void) +{ + int i; + + for (i = 0; i < _Tnum_timers; i++) + { + cuEventDestroy (_Tstart_events[i]); + cuEventDestroy (_Tstop_events[i]); + } + + cuStreamDestroy (_Tstream); + + free (_Tstart_events); + free (_Tstop_events); +} + +void +start_timer (int timer) +{ + CUresult r; + + r = cuEventRecord (_Tstart_events[timer], _Tstream); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventRecord failed: %d\n", r); + abort (); + } +} + +float +stop_timer (int timer) +{ + CUresult r; + float etime; + + r = cuEventRecord (_Tstop_events[timer], _Tstream); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventRecord failed: %d\n", r); + abort (); + } + + r = cuEventSynchronize (_Tstop_events[timer]); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventSynchronize failed: %d\n", r); + abort (); + } + + r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventElapsedTime failed: %d\n", r); + abort (); + } + + return etime; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c new file mode 100644 index 00000000000..c7e7257a873 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c @@ -0,0 +1,282 @@ +/* Copy of update-1.c with self exchanged with host for #pragma acc update. */ + +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c; + float *d_a, *d_b, *d_c; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + d_a = (float *) acc_malloc (N * sizeof (float)); + d_b = (float *) acc_malloc (N * sizeof (float)); + d_c = (float *) acc_malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + + acc_map_data (a, d_a, N * sizeof (float)); + acc_map_data (b, d_b, N * sizeof (float)); + acc_map_data (c, d_c, N * sizeof (float)); + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 6.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 7.0; + b[i] = 2.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 7.0) + abort (); + + if (b[i] != 7.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc update device (a[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 9.0) + abort (); + + if (b[i] != 9.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + } + +#pragma acc update device (a[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + } + +#pragma acc update device (a[0:N >> 1]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < (N >> 1); i++) + { + if (a[i] != 6.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + for (i = (N >> 1); i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1.c new file mode 100644 index 00000000000..dff139f03cc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1.c @@ -0,0 +1,280 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c; + float *d_a, *d_b, *d_c; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + d_a = (float *) acc_malloc (N * sizeof (float)); + d_b = (float *) acc_malloc (N * sizeof (float)); + d_c = (float *) acc_malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + + acc_map_data (a, d_a, N * sizeof (float)); + acc_map_data (b, d_b, N * sizeof (float)); + acc_map_data (c, d_c, N * sizeof (float)); + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 6.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 7.0; + b[i] = 2.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 7.0) + abort (); + + if (b[i] != 7.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc update device (a[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 9.0) + abort (); + + if (b[i] != 9.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + } + +#pragma acc update device (a[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + } + +#pragma acc update device (a[0:N >> 1]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < (N >> 1); i++) + { + if (a[i] != 6.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + for (i = (N >> 1); i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp new file mode 100644 index 00000000000..c0c70bbacad --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/c.exp @@ -0,0 +1,71 @@ +# This whole file adapted from libgomp.c/c.exp. + +if [info exists lang_library_path] then { + unset lang_library_path + unset lang_link_flags +} +if [info exists lang_test_file] then { + unset lang_test_file +} +if [info exists lang_include_flags] then { + unset lang_include_flags +} + +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp + +# If a testcase doesn't have special options, use these. +if ![info exists DEFAULT_CFLAGS] then { + set DEFAULT_CFLAGS "-O2" +} + +# Initialize dg. +dg-init + +# Turn on OpenACC. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc" + +# Gather a list of all tests. +set tests [lsort [concat \ + [find $srcdir/$subdir *.c] \ + [find $srcdir/$subdir/../libgomp.oacc-c-c++-common *.c]]] + +set ld_library_path $always_ld_library_path +append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] +set_ld_library_path_env_vars + +# Test OpenACC with available accelerators. +set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS" +foreach offload_target_openacc $offload_targets_s_openacc { + set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS" + set tagopt "-DACC_DEVICE_TYPE_$offload_target_openacc=1" + + switch $offload_target_openacc { + host { + set acc_mem_shared 1 + } + host_nonshm { + set acc_mem_shared 0 + } + nvidia { + # Copy ptx file (TEMPORARY) + remote_download host $srcdir/libgomp.oacc-c-c++-common/subr.ptx + + # Where timer.h lives + lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/libgomp.oacc-c-c++-common" + + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + setenv ACC_DEVICE_TYPE $offload_target_openacc + + dg-runtest $tests "$tagopt" $DEFAULT_CFLAGS +} + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 new file mode 100644 index 00000000000..52b030bb6ce --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 @@ -0,0 +1,10 @@ +! { dg-shouldfail "" { *-*-* } { "*" } { "" } } + +program main + implicit none + + !$acc parallel + call abort + !$acc end parallel + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 new file mode 100644 index 00000000000..2ba2bcb8aa6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 @@ -0,0 +1,13 @@ +program main + implicit none + + integer :: argc + argc = command_argument_count () + + !$acc parallel copyin(argc) + if (argc .ne. 0) then + call abort + end if + !$acc end parallel + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 new file mode 100644 index 00000000000..448881837ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 @@ -0,0 +1,52 @@ +! { dg-additional-options "-cpp" } +! TODO: Have to disable the acc_on_device builtin for we want to test the +! libgomp library function? The command line option +! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for +! Fortran. + +use openacc +implicit none + +! Host. + +if (.not. acc_on_device (acc_device_none)) call abort +if (.not. acc_on_device (acc_device_host)) call abort +if (acc_on_device (acc_device_host_nonshm)) call abort +if (acc_on_device (acc_device_not_host)) call abort +if (acc_on_device (acc_device_nvidia)) call abort + + +! Host via offloading fallback mode. + +!$acc parallel if(.false.) +if (.not. acc_on_device (acc_device_none)) call abort +if (.not. acc_on_device (acc_device_host)) call abort +if (acc_on_device (acc_device_host_nonshm)) call abort +if (acc_on_device (acc_device_not_host)) call abort +if (acc_on_device (acc_device_nvidia)) call abort +!$acc end parallel + + +#if !ACC_DEVICE_TYPE_host + +! Offloaded. + +!$acc parallel +if (acc_on_device (acc_device_none)) call abort +if (acc_on_device (acc_device_host)) call abort +#if ACC_DEVICE_TYPE_host_nonshm +if (.not. acc_on_device (acc_device_host_nonshm)) call abort +#else +if (acc_on_device (acc_device_host_nonshm)) call abort +#endif +if (.not. acc_on_device (acc_device_not_host)) call abort +#if ACC_DEVICE_TYPE_nvidia +if (.not. acc_on_device (acc_device_nvidia)) call abort +#else +if (acc_on_device (acc_device_nvidia)) call abort +#endif +!$acc end parallel + +#endif + +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f new file mode 100644 index 00000000000..0047a194f66 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f @@ -0,0 +1,52 @@ +! { dg-additional-options "-cpp" } +! TODO: Have to disable the acc_on_device builtin for we want to test +! the libgomp library function? The command line option +! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not +! for Fortran. + + USE OPENACC + IMPLICIT NONE + +!Host. + + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT + + +!Host via offloading fallback mode. + +!$ACC PARALLEL IF(.FALSE.) + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +!$ACC END PARALLEL + + +#if !ACC_DEVICE_TYPE_host + +! Offloaded. + +!$ACC PARALLEL + IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_host_nonshm + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#endif + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_nvidia + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#endif +!$ACC END PARALLEL + +#endif + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f new file mode 100644 index 00000000000..49d7a720fe3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f @@ -0,0 +1,52 @@ +! { dg-additional-options "-cpp" } +! TODO: Have to disable the acc_on_device builtin for we want to test +! the libgomp library function? The command line option +! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not +! for Fortran. + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + +!Host. + + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT + + +!Host via offloading fallback mode. + +!$ACC PARALLEL IF(.FALSE.) + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +!$ACC END PARALLEL + + +#if !ACC_DEVICE_TYPE_host + +! Offloaded. + +!$ACC PARALLEL + IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_host_nonshm + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#endif + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_nvidia + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#endif +!$ACC END PARALLEL + +#endif + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 new file mode 100644 index 00000000000..b6e637ba43f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 @@ -0,0 +1,135 @@ +! { dg-do run } + +program asyncwait + integer, parameter :: N = 64 + real, allocatable :: a(:), b(:), c(:), d(:), e(:) + integer i + + allocate (a(N)) + allocate (b(N)) + allocate (c(N)) + allocate (d(N)) + allocate (e(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc data copy (a(1:N)) copy (b(1:N)) + + !$acc parallel async + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc wait + !$acc end data + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 3.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + + !$acc data copy (a(1:N)) copy (b(1:N)) + + !$acc parallel async (1) + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc wait (1) + !$acc end data + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 2.0) call abort + end do + + a(:) = 3.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + + !$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N)) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + !$acc loop + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc wait (1) + !$acc end data + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 9.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + e(:) = 0.0 + + !$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + !$acc loop + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + !$acc loop + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc parallel wait (1) async (1) + !$acc loop + do i = 1, N + e(i) = a(i) + b(i) + c(i) + d(i) + end do + !$acc end parallel + + !$acc wait (1) + !$acc end data + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 4.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + if (e(i) .ne. 11.0) call abort + end do +end program asyncwait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 new file mode 100644 index 00000000000..bade52bdd7d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 @@ -0,0 +1,40 @@ +! { dg-do run } + +program parallel_wait + integer, parameter :: N = 64 + real, allocatable :: a(:), b(:), c(:) + integer i + + allocate (a(N)) + allocate (b(N)) + allocate (c(N)) + + !$acc parallel async (0) + !$acc loop + do i = 1, N + a(i) = 1 + end do + !$acc end parallel + + !$acc parallel async (1) + !$acc loop + do i = 1, N + b(i) = 1 + end do + !$acc end parallel + + !$acc parallel wait (0, 1) + !$acc loop + do i = 1, N + c(i) = a(i) + b(i) + end do + !$acc end parallel + + do i = 1, N + if (c(i) .ne. 2.0) call abort + end do + + deallocate (a) + deallocate (b) + deallocate (c) +end program parallel_wait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 new file mode 100644 index 00000000000..d48dc11bfd6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 @@ -0,0 +1,42 @@ +! { dg-do run } + +program parallel_wait + integer, parameter :: N = 64 + real, allocatable :: a(:), b(:), c(:) + integer i + + allocate (a(N)) + allocate (b(N)) + allocate (c(N)) + + !$acc parallel async (0) + !$acc loop + do i = 1, N + a(i) = 1 + end do + !$acc end parallel + + !$acc parallel async (1) + !$acc loop + do i = 1, N + b(i) = 1 + end do + !$acc end parallel + + !$acc wait (0, 1) + + !$acc parallel + !$acc loop + do i = 1, N + c(i) = a(i) + b(i) + end do + !$acc end parallel + + do i = 1, N + if (c(i) .ne. 2.0) call abort + end do + + deallocate (a) + deallocate (b) + deallocate (c) +end program parallel_wait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 new file mode 100644 index 00000000000..4c07bc2108e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 @@ -0,0 +1,27 @@ +! { dg-do run } + +program collapse1 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel + !$acc loop collapse(4 - 1) + do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + a(i, j, k) = i + j + k + end do + end do + end do + !$acc loop collapse(2) reduction(.or.:l) + do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do + !$acc end parallel + if (l) call abort +end program collapse1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 new file mode 100644 index 00000000000..ca3b638a461 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 @@ -0,0 +1,25 @@ +! { dg-do run } + +program collapse2 + integer :: i, j, k, a(1:3, 4:6, 5:7) + logical :: l + l = .false. + a(:, :, :) = 0 + !$acc parallel + !$acc loop collapse(4 - 1) + do 164 i = 1, 3 + do 164 j = 4, 6 + do 164 k = 5, 7 + a(i, j, k) = i + j + k +164 end do + !$acc loop collapse(2) reduction(.or.:l) +firstdo: do i = 1, 3 + do j = 4, 6 + do k = 5, 7 + if (a(i, j, k) .ne. (i + j + k)) l = .true. + end do + end do + end do firstdo + !$acc end parallel + if (l) call abort +end program collapse2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 new file mode 100644 index 00000000000..50e6100a1a0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 @@ -0,0 +1,28 @@ +! { dg-do run } + +program collapse3 + integer :: a(3,3,3), k, kk, kkk, l, ll, lll + !$acc parallel + !$acc loop collapse(3) + do 115 k=1,3 +dokk: do kk=1,3 + do kkk=1,3 + a(k,kk,kkk) = 1 + enddo + enddo dokk +115 continue + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.1)) call abort + + !$acc parallel + !$acc loop collapse(3) +dol: do 120 l=1,3 +doll: do ll=1,3 + do lll=1,3 + a(l,ll,lll) = 2 + enddo + enddo doll +120 end do dol + !$acc end parallel + if (any(a(1:3,1:3,1:3).ne.2)) call abort +end program collapse3 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 new file mode 100644 index 00000000000..41b66db43ec --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 @@ -0,0 +1,40 @@ +! { dg-do run } + +! collapse3.f90:test1 +program collapse4 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse4 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 new file mode 100644 index 00000000000..8c20f043f4c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 @@ -0,0 +1,48 @@ +! { dg-do run } + +! collapse3.f90:test2 +program collapse5 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 3 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + m = i * 100 + j * 10 + k + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.l) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse5 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 new file mode 100644 index 00000000000..7404b9182ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 @@ -0,0 +1,50 @@ +! { dg-do run } + +! collapse3.f90:test3 +program collapse6 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 3 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + v7 = 1 + v8 = 1 + v9 = 1 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2, v7 + do j = v3, v4, v8 + do k = v5, v6, v9 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse6 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 new file mode 100644 index 00000000000..12efd8c0ab4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 @@ -0,0 +1,40 @@ +! { dg-do run } + +! collapse3.f90:test4 +program collapse7 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = 2, 6 + do j = -2, 4 + do k = 13, 18 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = 1, 7 + do j = -3, 5 + do k = 12, 19 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse7 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 new file mode 100644 index 00000000000..04fbcfef49a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 @@ -0,0 +1,47 @@ +! { dg-do run } + +! collapse3.f90:test5 +program collapse8 + integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19) + integer :: v1, v2, v3, v4, v5, v6 + logical :: l, r + l = .false. + r = .false. + a(:, :, :) = 0 + b(:, :, :) = 0 + v1 = 3 + v2 = 6 + v3 = -2 + v4 = 4 + v5 = 13 + v6 = 18 + !$acc parallel + !$acc loop collapse (3) reduction (.or.:l) + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + l = l.or.k.lt.13.or.k.gt.18 + if (.not.l) a(i, j, k) = a(i, j, k) + 1 + end do + end do + end do + !$acc end parallel + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4 + r = r.or.k.lt.13.or.k.gt.18 + if (.not.r) b(i, j, k) = b(i, j, k) + 1 + end do + end do + end do + if (l .neqv. r) call abort + do i = v1, v2 + do j = v3, v4 + do k = v5, v6 + if (a(i, j, k) .ne. b(i, j, k)) call abort + end do + end do + end do +end program collapse8 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90 new file mode 100644 index 00000000000..5e94e2d7f47 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90 @@ -0,0 +1,45 @@ +! { dg-do run } + +program test + integer, parameter :: N = 8 + real, allocatable :: a(:), b(:) + + allocate (a(N)) + allocate (b(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc enter data copyin (a(1:N), b(1:N)) + + !$acc parallel + do i = 1, n + b(i) = a (i) + end do + !$acc end parallel + + !$acc exit data copyout (a(1:N), b(1:N)) + + do i = 1, n + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 3.0) call abort + end do + + a(:) = 5.0 + b(:) = 1.0 + + !$acc enter data copyin (a(1:N), b(1:N)) + + !$acc parallel + do i = 1, n + b(i) = a (i) + end do + !$acc end parallel + + !$acc exit data copyout (a(1:N), b(1:N)) + + do i = 1, n + if (a(i) .ne. 5.0) call abort + if (b(i) .ne. 5.0) call abort + end do +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 new file mode 100644 index 00000000000..8736c2a1f10 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +program test + integer, parameter :: N = 8 + real, allocatable :: a(:,:), b(:,:) + + allocate (a(N,N)) + allocate (b(N,N)) + + a(:,:) = 3.0 + b(:,:) = 0.0 + + !$acc enter data copyin (a(1:N,1:N), b(1:N,1:N)) + + !$acc parallel + do i = 1, n + do j = 1, n + b(j,i) = a (j,i) + end do + end do + !$acc end parallel + + !$acc exit data copyout (a(1:N,1:N), b(1:N,1:N)) + + do i = 1, n + do j = 1, n + if (a(j,i) .ne. 3.0) call abort + if (b(j,i) .ne. 3.0) call abort + end do + end do +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 new file mode 100644 index 00000000000..9868cb0da9d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 @@ -0,0 +1,131 @@ +! { dg-do run } + +program asyncwait + real, allocatable :: a(:), b(:), c(:), d(:), e(:) + integer i, N + + N = 64 + + allocate (a(N)) + allocate (b(N)) + allocate (c(N)) + allocate (d(N)) + allocate (e(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async + + !$acc parallel async wait + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc wait + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 3.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + + !$acc enter data copyin (a(1:N)) copyin (b(1:N)) async (1) + + !$acc parallel async (1) wait (1) + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc wait (1) + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 2.0) call abort + end do + + a(:) = 3.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + + !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc wait (1) + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 9.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + e(:) = 0.0 + + !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) copyin (e(1:N)) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc parallel wait (1) async (1) + do i = 1, N + e(i) = a(i) + b(i) + c(i) + d(i) + end do + !$acc end parallel + + !$acc wait (1) + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) copyout (e(1:N)) + !$acc exit data delete (N) + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 4.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + if (e(i) .ne. 11.0) call abort + end do +end program asyncwait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90 new file mode 100644 index 00000000000..16a85980b4d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90 @@ -0,0 +1,138 @@ +! Copy of data-4.f90 with self exchanged with host for !acc update. + +! { dg-do run } + +program asyncwait + real, allocatable :: a(:), b(:), c(:), d(:), e(:) + integer i, N + + N = 64 + + allocate (a(N)) + allocate (b(N)) + allocate (c(N)) + allocate (d(N)) + allocate (e(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async + + !$acc parallel async wait + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc update self (a(1:N), b(1:N)) async wait + !$acc wait + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 3.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + + !$acc update device (a(1:N), b(1:N)) async (1) + + !$acc parallel async (1) wait (1) + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc update host (a(1:N), b(1:N)) async (1) wait (1) + !$acc wait (1) + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 2.0) call abort + end do + + a(:) = 3.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + + !$acc enter data copyin (c(1:N), d(1:N)) async (1) + !$acc update device (a(1:N), b(1:N)) async (1) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc update self (a(1:N), b(1:N), c(1:N), d(1:N)) async (1) wait (1) + + !$acc wait (1) + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 9.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + e(:) = 0.0 + + !$acc enter data copyin (e(1:N)) async (1) + !$acc update device (a(1:N), b(1:N), c(1:N), d(1:N)) async (1) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc parallel wait (1) async (1) + do i = 1, N + e(i) = a(i) + b(i) + c(i) + d(i) + end do + !$acc end parallel + + !$acc update self (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) async (1) wait (1) + !$acc wait (1) + !$acc exit data delete (N, a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 4.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + if (e(i) .ne. 11.0) call abort + end do +end program asyncwait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-4.f90 new file mode 100644 index 00000000000..f6886b0e1e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-4.f90 @@ -0,0 +1,136 @@ +! { dg-do run } + +program asyncwait + real, allocatable :: a(:), b(:), c(:), d(:), e(:) + integer i, N + + N = 64 + + allocate (a(N)) + allocate (b(N)) + allocate (c(N)) + allocate (d(N)) + allocate (e(N)) + + a(:) = 3.0 + b(:) = 0.0 + + !$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async + + !$acc parallel async wait + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc update host (a(1:N), b(1:N)) async wait + !$acc wait + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 3.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + + !$acc update device (a(1:N), b(1:N)) async (1) + + !$acc parallel async (1) wait (1) + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end parallel + + !$acc update self (a(1:N), b(1:N)) async (1) wait (1) + !$acc wait (1) + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 2.0) call abort + end do + + a(:) = 3.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + + !$acc enter data copyin (c(1:N), d(1:N)) async (1) + !$acc update device (a(1:N), b(1:N)) async (1) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc update host (a(1:N), b(1:N), c(1:N), d(1:N)) async (1) wait (1) + + !$acc wait (1) + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 9.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + e(:) = 0.0 + + !$acc enter data copyin (e(1:N)) async (1) + !$acc update device (a(1:N), b(1:N), c(1:N), d(1:N)) async (1) + + !$acc parallel async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end parallel + + !$acc parallel async (1) + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end parallel + + !$acc parallel wait (1) async (1) + do i = 1, N + e(i) = a(i) + b(i) + c(i) + d(i) + end do + !$acc end parallel + + !$acc update host (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) async (1) wait (1) + !$acc wait (1) + !$acc exit data delete (N, a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 4.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + if (e(i) .ne. 11.0) call abort + end do +end program asyncwait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-1.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-1.f new file mode 100644 index 00000000000..ac220ab4c7e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-1.f @@ -0,0 +1,17 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + + INTEGER I + + CALL ACC_COPYIN (I) + +!$ACC DATA COPY (I) + I = 0 +!$ACC END DATA + + END + +! { dg-shouldfail "" } +! { dg-output "Trying to map into device .* object when .* is already mapped" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-2.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-2.f new file mode 100644 index 00000000000..2c5254b8684 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-2.f @@ -0,0 +1,16 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + + INTEGER I + +!$ACC DATA PRESENT_OR_COPY (I) +!$ACC DATA COPYOUT (I) + I = 0 +!$ACC END DATA +!$ACC END DATA + + END + +! { dg-shouldfail "" } +! { dg-output "Trying to map into device .* object when .* is already mapped" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-3.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-3.f new file mode 100644 index 00000000000..c41de28f539 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-3.f @@ -0,0 +1,15 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + + INTEGER I + +!$ACC DATA PRESENT_OR_COPY (I) + CALL ACC_COPYIN (I) +!$ACC END DATA + + END + +! { dg-shouldfail "" } +! { dg-output "already mapped to" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-4.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-4.f new file mode 100644 index 00000000000..f54bf580ce9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-4.f @@ -0,0 +1,14 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + + INTEGER I + + CALL ACC_PRESENT_OR_COPYIN (I) + CALL ACC_COPYIN (I) + + END + +! { dg-shouldfail "" } +! { dg-output "already mapped to" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-5.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-5.f new file mode 100644 index 00000000000..9a3e94fac7a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-5.f @@ -0,0 +1,14 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + + INTEGER I + +!$ACC ENTER DATA CREATE (I) + CALL ACC_COPYIN (I) + + END + +! { dg-shouldfail "" } +! { dg-output "already mapped to" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-6.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-6.f new file mode 100644 index 00000000000..eaf5d982492 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-6.f @@ -0,0 +1,14 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + + INTEGER I + + CALL ACC_PRESENT_OR_COPYIN (I) +!$ACC ENTER DATA CREATE (I) + + END + +! { dg-shouldfail "" } +! { dg-output "already mapped to" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-7.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-7.f new file mode 100644 index 00000000000..d96bf0b2f84 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-7.f @@ -0,0 +1,14 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + INCLUDE "openacc_lib.h" + + INTEGER I + +!$ACC ENTER DATA CREATE (I) + CALL ACC_CREATE (I) + + END + +! { dg-shouldfail "" } +! { dg-output "already mapped to" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-8.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-8.f new file mode 100644 index 00000000000..16da048861a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-8.f @@ -0,0 +1,16 @@ +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + + IMPLICIT NONE + + INTEGER I + +!$ACC DATA CREATE (I) +!$ACC PARALLEL COPYIN (I) + I = 0 +!$ACC END PARALLEL +!$ACC END DATA + + END + +! { dg-shouldfail "" } +! { dg-output "Trying to map into device .* object when .* is already mapped" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp new file mode 100644 index 00000000000..a8f62e84d67 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp @@ -0,0 +1,98 @@ +# This whole file adapted from libgomp.fortran/fortran.exp. + +load_lib libgomp-dg.exp +load_gcc_lib gcc-dg.exp +load_gcc_lib gfortran-dg.exp + +global shlib_ext +global ALWAYS_CFLAGS + +set shlib_ext [get_shlib_extension] +set lang_library_path "../libgfortran/.libs" +set lang_link_flags "-lgfortran" +if [info exists lang_include_flags] then { + unset lang_include_flags +} +set lang_test_file_found 0 +set quadmath_library_path "../libquadmath/.libs" + + +# Initialize dg. +dg-init + +# Turn on OpenACC. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc" + +if { $blddir != "" } { + set lang_source_re {^.*\.[fF](|90|95|03|08)$} + set lang_include_flags "-fintrinsic-modules-path=${blddir}" + # Look for a static libgfortran first. + if [file exists "${blddir}/${lang_library_path}/libgfortran.a"] { + set lang_test_file "${lang_library_path}/libgfortran.a" + set lang_test_file_found 1 + # We may have a shared only build, so look for a shared libgfortran. + } elseif [file exists "${blddir}/${lang_library_path}/libgfortran.${shlib_ext}"] { + set lang_test_file "${lang_library_path}/libgfortran.${shlib_ext}" + set lang_test_file_found 1 + } else { + puts "No libgfortran library found, will not execute fortran tests" + } +} elseif [info exists GFORTRAN_UNDER_TEST] { + set lang_test_file_found 1 + # Needs to exist for libgomp.exp. + set lang_test_file "" +} else { + puts "GFORTRAN_UNDER_TEST not defined, will not execute fortran tests" +} + +if { $lang_test_file_found } { + # Gather a list of all tests. + set tests [lsort [find $srcdir/$subdir *.\[fF\]{,90,95,03,08}]] + + if { $blddir != "" } { + if { [file exists "${blddir}/${quadmath_library_path}/libquadmath.a"] + || [file exists "${blddir}/${quadmath_library_path}/libquadmath.${shlib_ext}"] } { + lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/${quadmath_library_path}/" + # Allow for spec subsitution. + lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/${quadmath_library_path}/" + set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}:${blddir}/${quadmath_library_path}" + } else { + set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}" + } + } else { + set ld_library_path "$always_ld_library_path" + } + append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] + set_ld_library_path_env_vars + + # Test OpenACC with available accelerators. + foreach offload_target_openacc $offload_targets_s_openacc { + set tagopt "-DACC_DEVICE_TYPE_$offload_target_openacc=1" + + switch $offload_target_openacc { + host { + set acc_mem_shared 1 + } + host_nonshm { + set acc_mem_shared 0 + } + nvidia { + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + setenv ACC_DEVICE_TYPE $offload_target_openacc + + # For Fortran we're doing torture testing, as Fortran has far more tests + # with arrays etc. that testing just -O0 or -O2 is insufficient, that is + # typically not the case for C/C++. + gfortran-dg-runtest $tests "$tagopt" "" + } +} + +# All done. +dg-finish diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 new file mode 100644 index 00000000000..51dc452764b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 @@ -0,0 +1,13 @@ +use openacc + +if (acc_get_num_devices (acc_device_host) .ne. 1) call abort +call acc_set_device_type (acc_device_host) +if (acc_get_device_type () .ne. acc_device_host) call abort +call acc_set_device_num (0, acc_device_host) +if (acc_get_device_num (acc_device_host) .ne. 0) call abort +call acc_shutdown (acc_device_host) + +call acc_init (acc_device_host) +call acc_shutdown (acc_device_host) + +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 new file mode 100644 index 00000000000..a54d6a7d92e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 @@ -0,0 +1,82 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f new file mode 100644 index 00000000000..a9d70b2489b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f @@ -0,0 +1,13 @@ + USE OPENACC + + IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT + CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT + CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + CALL ACC_INIT (ACC_DEVICE_HOST) + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f new file mode 100644 index 00000000000..56d2cd2b961 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f @@ -0,0 +1,13 @@ + INCLUDE "openacc_lib.h" + + IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT + CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT + CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + CALL ACC_INIT (ACC_DEVICE_HOST) + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 new file mode 100644 index 00000000000..3a2b661b5b7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +program main + use openacc + implicit none + + integer n + + if (acc_get_num_devices (acc_device_host) .ne. 1) call abort + + if (acc_get_num_devices (acc_device_none) .ne. 0) call abort + + call acc_init (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + call acc_set_device_type (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + n = 0 + + call acc_set_device_num (n, acc_device_host) + + if (acc_get_device_num (acc_device_host) .ne. 0) call abort + + if (.NOT. acc_async_test (n) ) call abort + + call acc_wait (n) + + call acc_wait_all () + + call acc_shutdown (acc_device_host) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 new file mode 100644 index 00000000000..e68eb890e63 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +program main + use openacc + implicit none + + integer n + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + n = 0 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort + + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort + + end if + + call acc_shutdown (acc_device_nvidia) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 new file mode 100644 index 00000000000..401ad661ee8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer n + + if (acc_get_num_devices (acc_device_host) .ne. 1) call abort + + if (acc_get_num_devices (acc_device_none) .ne. 0) call abort + + call acc_init (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + call acc_set_device_type (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + n = 0 + + call acc_set_device_num (n, acc_device_host) + + if (acc_get_device_num (acc_device_host) .ne. 0) call abort + + if (.NOT. acc_async_test (n) ) call abort + + call acc_wait (n) + + call acc_wait_all () + + call acc_shutdown (acc_device_host) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 new file mode 100644 index 00000000000..422df53bd3a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer n + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + n = 0 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort + + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort + + end if + + call acc_shutdown (acc_device_nvidia) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 new file mode 100644 index 00000000000..ad758b2e9d5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 @@ -0,0 +1,83 @@ +! { dg-do run } + +program main + use openacc + use iso_c_binding + implicit none + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 new file mode 100644 index 00000000000..082dd8a1d6e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 @@ -0,0 +1,97 @@ +program map + integer, parameter :: n = 20, c = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a) copyin (b) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + !$acc parallel pcopy (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a) pcopyin (b) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) +end program map + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f new file mode 100644 index 00000000000..db3c6b18540 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f @@ -0,0 +1,9 @@ +! { dg-do run } + + program main + implicit none + include "openacc_lib.h" + + if (openacc_version .ne. 201306) call abort; + + end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 new file mode 100644 index 00000000000..a14ecdd5032 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 @@ -0,0 +1,9 @@ +! { dg-do run } + +program main + use openacc + implicit none + + if (openacc_version .ne. 201306) call abort; + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 new file mode 100644 index 00000000000..a5e1fcbace4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 @@ -0,0 +1,21 @@ +! PR middle-end/63247 + +program test + implicit none + + integer(kind=2) a(4) + + a = 10; + + !$acc parallel copy(a(2:4)) + a(2) = 52 + a(3) = 53 + a(4) = 54 + !$acc end parallel + + if (a(1) .ne. 10) call abort + if (a(2) .ne. 52) call abort + if (a(3) .ne. 53) call abort + if (a(4) .ne. 54) call abort + +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 new file mode 100644 index 00000000000..1a1d4c724f8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 @@ -0,0 +1,229 @@ +! { dg-do run } + +program test + implicit none + integer, allocatable :: a1(:) + integer, allocatable :: b1(:) + integer, allocatable :: c1(:) + integer, allocatable :: b2(:,:) + integer, allocatable :: c3(:,:,:) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(1:5)) + a1(1) = 1 + a1(2) = 2 + a1(3) = 3 + a1(4) = 4 + a1(5) = 5 + !$acc end parallel + + if (a1(1) .ne. 1) call abort + if (a1(2) .ne. 2) call abort + if (a1(3) .ne. 3) call abort + if (a1(4) .ne. 4) call abort + if (a1(5) .ne. 5) call abort + + deallocate(a1) + + allocate (a1(0:4)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(0:4)) + a1(0) = 1 + a1(1) = 2 + a1(2) = 3 + a1(3) = 4 + a1(4) = 5 + !$acc end parallel + + if (a1(0) .ne. 1) call abort + if (a1(1) .ne. 2) call abort + if (a1(2) .ne. 3) call abort + if (a1(3) .ne. 4) call abort + if (a1(4) .ne. 5) call abort + + deallocate(a1) + + allocate (b2(5,5)) + if (.not.allocated (b2)) call abort() + + b2 = 11 + + !$acc parallel copy(b2(1:5,1:5)) + b2(1,1) = 1 + b2(2,2) = 2 + b2(3,3) = 3 + b2(4,4) = 4 + b2(5,5) = 5 + !$acc end parallel + + if (b2(1,1) .ne. 1) call abort + if (b2(2,2) .ne. 2) call abort + if (b2(3,3) .ne. 3) call abort + if (b2(4,4) .ne. 4) call abort + if (b2(5,5) .ne. 5) call abort + + deallocate(b2) + + allocate (b2(0:4,0:4)) + if (.not.allocated (b2)) call abort() + + b2 = 11 + + !$acc parallel copy(b2(0:4,0:4)) + b2(0,0) = 1 + b2(1,1) = 2 + b2(2,2) = 3 + b2(3,3) = 4 + b2(4,4) = 5 + !$acc end parallel + + if (b2(0,0) .ne. 1) call abort + if (b2(1,1) .ne. 2) call abort + if (b2(2,2) .ne. 3) call abort + if (b2(3,3) .ne. 4) call abort + if (b2(4,4) .ne. 5) call abort + + deallocate(b2) + + allocate (c3(5,5,5)) + if (.not.allocated (c3)) call abort() + + c3 = 12 + + !$acc parallel copy(c3(1:5,1:5,1:5)) + c3(1,1,1) = 1 + c3(2,2,2) = 2 + c3(3,3,3) = 3 + c3(4,4,4) = 4 + c3(5,5,5) = 5 + !$acc end parallel + + if (c3(1,1,1) .ne. 1) call abort + if (c3(2,2,2) .ne. 2) call abort + if (c3(3,3,3) .ne. 3) call abort + if (c3(4,4,4) .ne. 4) call abort + if (c3(5,5,5) .ne. 5) call abort + + deallocate(c3) + + allocate (c3(0:4,0:4,0:4)) + if (.not.allocated (c3)) call abort() + + c3 = 12 + + !$acc parallel copy(c3(0:4,0:4,0:4)) + c3(0,0,0) = 1 + c3(1,1,1) = 2 + c3(2,2,2) = 3 + c3(3,3,3) = 4 + c3(4,4,4) = 5 + !$acc end parallel + + if (c3(0,0,0) .ne. 1) call abort + if (c3(1,1,1) .ne. 2) call abort + if (c3(2,2,2) .ne. 3) call abort + if (c3(3,3,3) .ne. 4) call abort + if (c3(4,4,4) .ne. 5) call abort + + deallocate(c3) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + allocate (b1(5)) + if (.not.allocated (b1)) call abort() + + allocate (c1(5)) + if (.not.allocated (c1)) call abort() + + a1 = 10 + b1 = 3 + c1 = 7 + + !$acc parallel copyin(a1(1:5)) create(c1(1:5)) copyout(b1(1:5)) + c1(1) = a1(1) + c1(2) = a1(2) + c1(3) = a1(3) + c1(4) = a1(4) + c1(5) = a1(5) + + b1(1) = c1(1) + b1(2) = c1(2) + b1(3) = c1(3) + b1(4) = c1(4) + b1(5) = c1(5) + !$acc end parallel + + if (b1(1) .ne. 10) call abort + if (b1(2) .ne. 10) call abort + if (b1(3) .ne. 10) call abort + if (b1(4) .ne. 10) call abort + if (b1(5) .ne. 10) call abort + + deallocate(a1) + deallocate(b1) + deallocate(c1) + + allocate (a1(0:4)) + if (.not.allocated (a1)) call abort() + + allocate (b1(0:4)) + if (.not.allocated (b1)) call abort() + + allocate (c1(0:4)) + if (.not.allocated (c1)) call abort() + + a1 = 10 + b1 = 3 + c1 = 7 + + !$acc parallel copyin(a1(0:4)) create(c1(0:4)) copyout(b1(0:4)) + c1(0) = a1(0) + c1(1) = a1(1) + c1(2) = a1(2) + c1(3) = a1(3) + c1(4) = a1(4) + + b1(0) = c1(0) + b1(1) = c1(1) + b1(2) = c1(2) + b1(3) = c1(3) + b1(4) = c1(4) + !$acc end parallel + + if (b1(0) .ne. 10) call abort + if (b1(1) .ne. 10) call abort + if (b1(2) .ne. 10) call abort + if (b1(3) .ne. 10) call abort + if (b1(4) .ne. 10) call abort + + deallocate(a1) + deallocate(b1) + deallocate(c1) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(2:3)) + a1(2) = 2 + a1(3) = 3 + !$acc end parallel + + if (a1(1) .ne. 10) call abort + if (a1(2) .ne. 2) call abort + if (a1(3) .ne. 3) call abort + if (a1(4) .ne. 10) call abort + if (a1(5) .ne. 10) call abort + + deallocate(a1) + +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 new file mode 100644 index 00000000000..89e7fe7fe58 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 @@ -0,0 +1,225 @@ +! { dg-do run } + +! Integer reductions + +program reduction_1 + implicit none + + integer, parameter :: n = 10, vl = 2 + integer :: i, vresult, result + logical :: lresult, lvresult + integer, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! '*' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(*:result) + do i = 1, n + result = result * array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult * array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'max' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(max:result) + do i = 1, n + result = max (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = max (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'min' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(min:result) + do i = 1, n + result = min (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = min (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'iand' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(iand:result) + do i = 1, n + result = iand (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = iand (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'ior' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(ior:result) + do i = 1, n + result = ior (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = ior (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'ieor' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(ieor:result) + do i = 1, n + result = ieor (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = ieor (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.and.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.and.:lresult) + do i = 1, n + lresult = lresult .and. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .and. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.or.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.or.:lresult) + do i = 1, n + lresult = lresult .or. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .or. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.eqv.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.eqv.:lresult) + do i = 1, n + lresult = lresult .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .eqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.neqv.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.neqv.:lresult) + do i = 1, n + lresult = lresult .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .neqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort +end program reduction_1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 new file mode 100644 index 00000000000..d3659c9fc3e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 @@ -0,0 +1,170 @@ +! { dg-do run } + +! real reductions + +program reduction_2 + implicit none + + integer, parameter :: n = 10, vl = 2 + integer :: i + real, parameter :: e = .001 + real :: vresult, result + logical :: lresult, lvresult + real, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (abs (result - vresult) .ge. e) call abort + + result = 1 + vresult = 1 + + ! '*' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(*:result) + do i = 1, n + result = result * array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult * array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'max' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(max:result) + do i = 1, n + result = max (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = max (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'min' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(min:result) + do i = 1, n + result = min (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = min (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! '.and.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.and.:lresult) + do i = 1, n + lresult = lresult .and. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .and. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.or.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.or.:lresult) + do i = 1, n + lresult = lresult .or. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .or. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.eqv.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.eqv.:lresult) + do i = 1, n + lresult = lresult .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .eqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.neqv.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.neqv.:lresult) + do i = 1, n + lresult = lresult .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .neqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort +end program reduction_2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 new file mode 100644 index 00000000000..2b8005dac15 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 @@ -0,0 +1,170 @@ +! { dg-do run } + +! double precision reductions + +program reduction_3 + implicit none + + integer, parameter :: n = 10, vl = 2 + integer :: i + double precision, parameter :: e = .001 + double precision :: vresult, result + logical :: lresult, lvresult + double precision, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (abs (result - vresult) .ge. e) call abort + + result = 1 + vresult = 1 + + ! '*' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(*:result) + do i = 1, n + result = result * array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult * array(i) + end do + + if (result.ne.vresult) call abort + + result = 0 + vresult = 0 + + ! 'max' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(max:result) + do i = 1, n + result = max (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = max (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! 'min' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(min:result) + do i = 1, n + result = min (result, array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = min (vresult, array(i)) + end do + + if (result.ne.vresult) call abort + + result = 1 + vresult = 1 + + ! '.and.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.and.:lresult) + do i = 1, n + lresult = lresult .and. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .and. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.or.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.or.:lresult) + do i = 1, n + lresult = lresult .or. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .or. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.eqv.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.eqv.:lresult) + do i = 1, n + lresult = lresult .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .eqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort + + lresult = .false. + lvresult = .false. + + ! '.neqv.' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(.neqv.:lresult) + do i = 1, n + lresult = lresult .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + lvresult = lvresult .neqv. (array(i) .ge. 5) + end do + + if (result.ne.vresult) call abort +end program reduction_3 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 new file mode 100644 index 00000000000..12f7a33053c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 @@ -0,0 +1,54 @@ +! { dg-do run } + +! complex reductions + +program reduction_4 + implicit none + + integer, parameter :: n = 10, vl = 32 + integer :: i + complex :: vresult, result + complex, dimension (n) :: array + + do i = 1, n + array(i) = i + end do + + result = 0 + vresult = 0 + + ! '+' reductions + + !$acc parallel vector_length(vl) num_gangs(1) + !$acc loop reduction(+:result) + do i = 1, n + result = result + array(i) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult + array(i) + end do + + if (result .ne. vresult) call abort + + result = 1 + vresult = 1 + +! ! '*' reductions +! +! !$acc parallel vector_length(vl) +! !$acc loop reduction(*:result) +! do i = 1, n +! result = result * array(i) +! end do +! !$acc end parallel +! +! ! Verify the results +! do i = 1, n +! vresult = vresult * array(i) +! end do +! +! if (result.ne.vresult) call abort +end program reduction_4 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 new file mode 100644 index 00000000000..df44a7ae69d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 @@ -0,0 +1,32 @@ +! { dg-do run } + +! subroutine reduction + +program reduction + integer, parameter :: n = 40, c = 10 + integer :: i, vsum, sum + + call redsub (sum, n, c) + + vsum = 0 + + ! Verify the results + do i = 1, n + vsum = vsum + c + end do + + if (sum.ne.vsum) call abort () +end program reduction + +subroutine redsub(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel vector_length(n) copyin (n, c) num_gangs(1) + !$acc loop reduction(+:sum) + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 new file mode 100644 index 00000000000..63254318954 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 @@ -0,0 +1,30 @@ +! { dg-do run } + +program reduction + implicit none + + integer, parameter :: n = 100 + integer :: i, s1, s2, vs1, vs2 + + s1 = 0 + s2 = 0 + vs1 = 0 + vs2 = 0 + + !$acc parallel vector_length (1000) + !$acc loop reduction(+:s1, s2) + do i = 1, n + s1 = s1 + 1 + s2 = s2 + 2 + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vs1 = vs1 + 1 + vs2 = vs2 + 2 + end do + + if (s1.ne.vs1) call abort () + if (s2.ne.vs2) call abort () +end program reduction diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 new file mode 100644 index 00000000000..33905157d49 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 @@ -0,0 +1,32 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + interface + recursive function fact (x) + !$acc routine + integer, intent(in) :: x + integer :: fact + end function fact + end interface + integer, parameter :: n = 10 + integer :: a(n), i + !$acc parallel + !$acc loop + do i = 1, n + a(i) = fact (i) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. fact(i)) call abort + end do +end +recursive function fact (x) result (res) + !$acc routine + integer, intent(in) :: x + integer :: res + if (x < 1) then + res = 1 + else + res = x * fact (x - 1) + end if +end function fact diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 new file mode 100644 index 00000000000..3d418b660c5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 @@ -0,0 +1,29 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + module m1 + contains + recursive function fact (x) result (res) + !$acc routine + integer, intent(in) :: x + integer :: res + if (x < 1) then + res = 1 + else + res = x * fact (x - 1) + end if + end function fact + end module m1 + use m1 + integer, parameter :: n = 10 + integer :: a(n), i + !$acc parallel + !$acc loop + do i = 1, n + a(i) = fact (i) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. fact(i)) call abort + end do +end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 new file mode 100644 index 00000000000..d233a63ee5a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 @@ -0,0 +1,27 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + integer, parameter :: n = 10 + integer :: a(n), i + integer, external :: fact + !$acc routine (fact) + !$acc parallel + !$acc loop + do i = 1, n + a(i) = fact (i) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. fact(i)) call abort + end do +end +recursive function fact (x) result (res) + !$acc routine + integer, intent(in) :: x + integer :: res + if (x < 1) then + res = 1 + else + res = x * fact (x - 1) + end if +end function fact diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 new file mode 100644 index 00000000000..3e5fb09c45c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 @@ -0,0 +1,23 @@ +! { dg-do run } +! { dg-options "-fno-inline" } + + integer, parameter :: n = 10 + integer :: a(n), i + do i = 1, n + a(i) = i + end do + !$acc parallel + !$acc loop + do i = 1, n + call incr(a(i)) + end do + !$acc end parallel + do i = 1, n + if (a(i) .ne. (i + 1)) call abort + end do +end +subroutine incr (x) + !$acc routine + integer, intent(inout) :: x + x = x + 1 +end subroutine incr diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 new file mode 100644 index 00000000000..b39414fe3e5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 @@ -0,0 +1,97 @@ +program subarrays + integer, parameter :: n = 20, c = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a(1:n)) copyin (b(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + !$acc parallel pcopy (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a(1:n)) pcopyin (b(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) +end program subarrays + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 new file mode 100644 index 00000000000..81799f60733 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 @@ -0,0 +1,100 @@ +program subarrays + integer, parameter :: n = 20, c = 10, low = 5, high = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + do i = low, high + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + do i = low, high + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a(low:high)) copyin (b(low:high)) + !$acc loop + do i = low, high + a(i) = b(i) + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + a(:) = 0 + + !$acc parallel pcopy (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a(low:high)) & + !$acc & pcopyin (b(low:high)) + !$acc loop + do i = low, high + a(i) = b(i) + end do + !$acc end parallel + + call check (a, b, n) +end program subarrays + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check diff --git a/liboffloadmic/ChangeLog b/liboffloadmic/ChangeLog index 9faa452f990..074926e3595 100644 --- a/liboffloadmic/ChangeLog +++ b/liboffloadmic/ChangeLog @@ -1,3 +1,8 @@ +2015-01-15 Thomas Schwinge + + * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_get_name) + (GOMP_OFFLOAD_get_caps, GOMP_OFFLOAD_fini_device): New functions. + 2014-11-13 Ilya Verbin Andrey Turetskiy diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp index 0428b79b3e8..3e7a95860b6 100644 --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include "libgomp-plugin.h" #include "compiler_if_host.h" #include "main_target_image.h" @@ -135,6 +135,22 @@ set_mic_lib_path (void) } } +extern "C" const char * +GOMP_OFFLOAD_get_name (void) +{ + const char *res = "intelmic"; + TRACE ("(): return %s", res); + return res; +} + +extern "C" unsigned int +GOMP_OFFLOAD_get_caps (void) +{ + unsigned int res = GOMP_OFFLOAD_CAP_OPENMP_400; + TRACE ("(): return %x", res); + return res; +} + extern "C" enum offload_target_type GOMP_OFFLOAD_get_type (void) { @@ -190,6 +206,14 @@ GOMP_OFFLOAD_init_device (int device) NULL, NULL); } +extern "C" void +GOMP_OFFLOAD_fini_device (int device) +{ + TRACE (""); + /* Unreachable for GOMP_OFFLOAD_CAP_OPENMP_400. */ + abort (); +} + static void get_target_table (int device, int &num_funcs, int &num_vars, void **&table) {