gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.

inlude/
	* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
	(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
	GOMP_DIM_MASK): New.
	(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
	(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
	GOMP_LAUNCH_OP_SHIFT): New.
	(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
	GOMP_LAUNCH_OP): New.
	(GOMP_LAUNCH_OP_MAX): New.

	libgomp/
	* libgomp.h (acc_dispatch_t): Replace separate geometry args with
	array.
	* libgomp.map (GOACC_parallel_keyed): New.
	* oacc-parallel.c (goacc_wait): Take pointer to va_list.  Adjust
	all callers.
	(GOACC_parallel_keyed): New interface.  Lose geometry arguments
	and take keyed varargs list.  Adjust call to exec_func.
	(GOACC_parallel): Force host fallback.
	* libgomp_g.h (GOACC_parallel): Remove.
	(GOACC_parallel_keyed): Declare.
	* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
	(stuct targ_gn_descriptor): Replace name field with launch field.
	(nvptx_exec): Lose separate geometry args, take array.  Process
	dynamic dimensions and adjust.
	(struct nvptx_tdata): Replace fn_names field with fn_descs.
	(GOMP_OFFLOAD_load_image): Adjust for change in function table
	data.
	(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
	passing.
	* oacc-host.c (host_openacc_exec): Adjust for change in dimension
	passing.

	gcc/
	* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
	(nvptx_record_offload_symbol): Record function execution geometry.
	* config/nvptx/mkoffload.c (process): Include launch geometry in
	function data.
	* omp-low.c (oacc_launch_pack): New.
	(replace_oacc_fn_attrib): New.
	(set_oacc_fn_attrib): New.
	(get_oacc_fn_attrib): New.
	(expand_omp_target): Create keyed varargs for GOACC_parallel call
	generation.
	* omp-low.h (get_oacc_fn_attrib): Declare.
	* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
	(DEF_FUNCTION_TYPE_VAR_11): Delete.
	* tree.h (OMP_CLAUSE_EXPR): New.
	* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.

	gcc/lto/
	* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
	(DEF_FUNCTION_TYPE_VAR_11): Delete.

	gcc/c-family/
	* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
	(DEF_FUNCTION_TYPE_VAR_11): Delete.

	gcc/fortran/
	* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
	(DEF_FUNCTION_TYPE_VAR_11): Delete.
	* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
	(DEF_FUNCTION_TYPE_VAR_11): Delete.

	gcc/ada/
	* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define

From-SVN: r228220
This commit is contained in:
Nathan Sidwell 2015-09-28 19:37:33 +00:00
parent 4e671509d9
commit 3e32ee19a5
26 changed files with 511 additions and 200 deletions

View File

@ -1,3 +1,21 @@
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
2015-09-28 Aditya Kumar <aditya.k7@samsung.com> 2015-09-28 Aditya Kumar <aditya.k7@samsung.com>
Sebastian Pop <s.pop@samsung.com> Sebastian Pop <s.pop@samsung.com>

View File

@ -1,3 +1,7 @@
2015-09-28 Tom de Vries <tom@codesourcery.com>
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
2015-09-17 Eric Botcazou <ebotcazou@adacore.com> 2015-09-17 Eric Botcazou <ebotcazou@adacore.com>
* gcc-interface/trans.c (emit_check): Do not touch TREE_SIDE_EFFECTS. * gcc-interface/trans.c (emit_check): Do not touch TREE_SIDE_EFFECTS.

View File

@ -5376,6 +5376,8 @@ enum c_builtin_type
#define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME, #define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME,
#define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ #define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
NAME, NAME,
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) NAME,
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) NAME, ARG6, ARG7) NAME,
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
@ -5398,6 +5400,7 @@ enum c_builtin_type
#undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_3
#undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_4
#undef DEF_FUNCTION_TYPE_VAR_5 #undef DEF_FUNCTION_TYPE_VAR_5
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11 #undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE
@ -5505,6 +5508,9 @@ install_builtin_function_types (void)
def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4); def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4);
#define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ #define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5); def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5);
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) \
def_fn_type (ENUM, RETURN, 1, 6, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) \ ARG6, ARG7) \
def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7); def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7);
@ -5533,6 +5539,7 @@ install_builtin_function_types (void)
#undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_3
#undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_4
#undef DEF_FUNCTION_TYPE_VAR_5 #undef DEF_FUNCTION_TYPE_VAR_5
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11 #undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE

View File

@ -590,15 +590,14 @@ DEF_FUNCTION_TYPE_VAR_5 (BT_FN_INT_STRING_SIZE_INT_SIZE_CONST_STRING_VAR,
DEF_FUNCTION_TYPE_VAR_5 (BT_FN_INT_INT_INT_INT_INT_INT_VAR, DEF_FUNCTION_TYPE_VAR_5 (BT_FN_INT_INT_INT_INT_INT_INT_VAR,
BT_INT, BT_INT, BT_INT, BT_INT, BT_INT, BT_INT) BT_INT, BT_INT, BT_INT, BT_INT, BT_INT, BT_INT)
DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
BT_PTR, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR, DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR, BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR,
BT_PTR, BT_INT, BT_INT) BT_PTR, BT_INT, BT_INT)
DEF_FUNCTION_TYPE_VAR_11 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR,
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
BT_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT, BT_INT,
BT_INT, BT_INT)
DEF_POINTER_TYPE (BT_PTR_FN_VOID_VAR, BT_FN_VOID_VAR) DEF_POINTER_TYPE (BT_PTR_FN_VOID_VAR, BT_FN_VOID_VAR)
DEF_FUNCTION_TYPE_3 (BT_FN_PTR_PTR_FN_VOID_VAR_PTR_SIZE, DEF_FUNCTION_TYPE_3 (BT_FN_PTR_PTR_FN_VOID_VAR_PTR_SIZE,
BT_PTR, BT_PTR_FN_VOID_VAR, BT_PTR, BT_SIZE) BT_PTR, BT_PTR_FN_VOID_VAR, BT_PTR, BT_SIZE)

View File

@ -1,3 +1,8 @@
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
2015-09-25 Marek Polacek <polacek@redhat.com> 2015-09-25 Marek Polacek <polacek@redhat.com>
* c-ubsan.c (ubsan_instrument_division): Remove unnecessary code. * c-ubsan.c (ubsan_instrument_division): Remove unnecessary code.

View File

@ -5548,10 +5548,10 @@ enum c_builtin_type
#define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME, #define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME,
#define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ #define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
NAME, NAME,
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) NAME,
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) NAME, ARG6, ARG7) NAME,
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
#define DEF_POINTER_TYPE(NAME, TYPE) NAME, #define DEF_POINTER_TYPE(NAME, TYPE) NAME,
#include "builtin-types.def" #include "builtin-types.def"
#undef DEF_PRIMITIVE_TYPE #undef DEF_PRIMITIVE_TYPE
@ -5570,8 +5570,8 @@ enum c_builtin_type
#undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_3
#undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_4
#undef DEF_FUNCTION_TYPE_VAR_5 #undef DEF_FUNCTION_TYPE_VAR_5
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE
BT_LAST BT_LAST
}; };
@ -5664,13 +5664,12 @@ c_define_builtins (tree va_list_ref_type_node, tree va_list_arg_type_node)
def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4); def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4);
#define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ #define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5); def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5);
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) \
def_fn_type (ENUM, RETURN, 1, 6, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) \ ARG6, ARG7) \
def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7); def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7);
#define DEF_FUNCTION_TYPE_VAR_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
def_fn_type (ENUM, RETURN, 1, 11, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
ARG7, ARG8, ARG9, ARG10, ARG11);
#define DEF_POINTER_TYPE(ENUM, TYPE) \ #define DEF_POINTER_TYPE(ENUM, TYPE) \
builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]); builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]);
@ -5692,8 +5691,8 @@ c_define_builtins (tree va_list_ref_type_node, tree va_list_arg_type_node)
#undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_3
#undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_4
#undef DEF_FUNCTION_TYPE_VAR_5 #undef DEF_FUNCTION_TYPE_VAR_5
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE
builtin_types[(int) BT_LAST] = NULL_TREE; builtin_types[(int) BT_LAST] = NULL_TREE;

View File

@ -842,6 +842,8 @@ process (FILE *in, FILE *out)
{ {
const char *input = read_file (in); const char *input = read_file (in);
Token *tok = tokenize (input); Token *tok = tokenize (input);
const char *comma;
id_map const *id;
do do
tok = parse_file (tok); tok = parse_file (tok);
@ -853,21 +855,25 @@ process (FILE *in, FILE *out)
write_stmts (out, rev_stmts (fns)); write_stmts (out, rev_stmts (fns));
fprintf (out, ";\n\n"); fprintf (out, ";\n\n");
fprintf (out, "static const char *const var_mappings[] = {\n"); fprintf (out, "static const char *const var_mappings[] = {");
for (id_map *id = var_ids; id; id = id->next) for (comma = "", id = var_ids; id; comma = ",", id = id->next)
fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : ""); fprintf (out, "%s\n\t%s", comma, id->ptx_name);
fprintf (out, "};\n\n"); fprintf (out, "\n};\n\n");
fprintf (out, "static const char *const func_mappings[] = {\n");
for (id_map *id = func_ids; id; id = id->next) fprintf (out, "static const struct nvptx_fn {\n"
fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : ""); " const char *name;\n"
fprintf (out, "};\n\n"); " unsigned short dim[3];\n"
"} func_mappings[] = {\n");
for (comma = "", id = func_ids; id; comma = ",", id = id->next)
fprintf (out, "%s\n\t{%s}", comma, id->ptx_name);
fprintf (out, "\n};\n\n");
fprintf (out, fprintf (out,
"static const struct nvptx_tdata {\n" "static const struct nvptx_tdata {\n"
" const char *ptx_src;\n" " const char *ptx_src;\n"
" const char *const *var_names;\n" " const char *const *var_names;\n"
" __SIZE_TYPE__ var_num;\n" " __SIZE_TYPE__ var_num;\n"
" const char *const *fn_names;\n" " const struct nvptx_fn *fn_names;\n"
" __SIZE_TYPE__ fn_num;\n" " __SIZE_TYPE__ fn_num;\n"
"} target_data = {\n" "} target_data = {\n"
" ptx_code,\n" " ptx_code,\n"

View File

@ -56,6 +56,8 @@
#include "cfgrtl.h" #include "cfgrtl.h"
#include "stor-layout.h" #include "stor-layout.h"
#include "builtins.h" #include "builtins.h"
#include "omp-low.h"
#include "gomp-constants.h"
/* This file should be included last. */ /* This file should be included last. */
#include "target-def.h" #include "target-def.h"
@ -2066,9 +2068,51 @@ nvptx_vector_alignment (const_tree type)
static void static void
nvptx_record_offload_symbol (tree decl) nvptx_record_offload_symbol (tree decl)
{ {
fprintf (asm_out_file, "//:%s_MAP %s\n", switch (TREE_CODE (decl))
TREE_CODE (decl) == VAR_DECL ? "VAR" : "FUNC", {
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); case VAR_DECL:
fprintf (asm_out_file, "//:VAR_MAP \"%s\"\n",
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
break;
case FUNCTION_DECL:
{
tree attr = get_oacc_fn_attrib (decl);
tree dims = NULL_TREE;
unsigned ix;
if (attr)
dims = TREE_VALUE (attr);
fprintf (asm_out_file, "//:FUNC_MAP \"%s\"",
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
for (ix = 0; ix != GOMP_DIM_MAX; ix++)
{
int size = 1;
/* TODO: This check can go away once the dimension default
machinery is merged to trunk. */
if (dims)
{
tree dim = TREE_VALUE (dims);
if (dim)
size = TREE_INT_CST_LOW (dim);
gcc_assert (!TREE_PURPOSE (dims));
dims = TREE_CHAIN (dims);
}
fprintf (asm_out_file, ", %#x", size);
}
fprintf (asm_out_file, "\n");
}
break;
default:
gcc_unreachable ();
}
} }
/* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects /* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects

View File

@ -1,3 +1,10 @@
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
2015-09-26 Mikael Morin <mikael@gcc.gnu.org> 2015-09-26 Mikael Morin <mikael@gcc.gnu.org>
PR fortran/67721 PR fortran/67721

View File

@ -635,10 +635,10 @@ gfc_init_builtin_functions (void)
ARG6, ARG7, ARG8) NAME, ARG6, ARG7, ARG8) NAME,
#define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME, #define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME,
#define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME, #define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME,
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) NAME,
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) NAME, ARG6, ARG7) NAME,
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
#define DEF_POINTER_TYPE(NAME, TYPE) NAME, #define DEF_POINTER_TYPE(NAME, TYPE) NAME,
#include "types.def" #include "types.def"
#undef DEF_PRIMITIVE_TYPE #undef DEF_PRIMITIVE_TYPE
@ -653,8 +653,8 @@ gfc_init_builtin_functions (void)
#undef DEF_FUNCTION_TYPE_8 #undef DEF_FUNCTION_TYPE_8
#undef DEF_FUNCTION_TYPE_VAR_0 #undef DEF_FUNCTION_TYPE_VAR_0
#undef DEF_FUNCTION_TYPE_VAR_2 #undef DEF_FUNCTION_TYPE_VAR_2
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE
BT_LAST BT_LAST
}; };
@ -1096,6 +1096,17 @@ gfc_init_builtin_functions (void)
builtin_types[(int) ARG1], \ builtin_types[(int) ARG1], \
builtin_types[(int) ARG2], \ builtin_types[(int) ARG2], \
NULL_TREE); NULL_TREE);
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) \
builtin_types[(int) ENUM] \
= build_varargs_function_type_list (builtin_types[(int) RETURN], \
builtin_types[(int) ARG1], \
builtin_types[(int) ARG2], \
builtin_types[(int) ARG3], \
builtin_types[(int) ARG4], \
builtin_types[(int) ARG5], \
builtin_types[(int) ARG6], \
NULL_TREE);
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) \ ARG6, ARG7) \
builtin_types[(int) ENUM] \ builtin_types[(int) ENUM] \
@ -1108,22 +1119,6 @@ gfc_init_builtin_functions (void)
builtin_types[(int) ARG6], \ builtin_types[(int) ARG6], \
builtin_types[(int) ARG7], \ builtin_types[(int) ARG7], \
NULL_TREE); NULL_TREE);
#define DEF_FUNCTION_TYPE_VAR_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
builtin_types[(int) ENUM] \
= build_varargs_function_type_list (builtin_types[(int) RETURN], \
builtin_types[(int) ARG1], \
builtin_types[(int) ARG2], \
builtin_types[(int) ARG3], \
builtin_types[(int) ARG4], \
builtin_types[(int) ARG5], \
builtin_types[(int) ARG6], \
builtin_types[(int) ARG7], \
builtin_types[(int) ARG8], \
builtin_types[(int) ARG9], \
builtin_types[(int) ARG10], \
builtin_types[(int) ARG11], \
NULL_TREE);
#define DEF_POINTER_TYPE(ENUM, TYPE) \ #define DEF_POINTER_TYPE(ENUM, TYPE) \
builtin_types[(int) ENUM] \ builtin_types[(int) ENUM] \
= build_pointer_type (builtin_types[(int) TYPE]); = build_pointer_type (builtin_types[(int) TYPE]);
@ -1140,8 +1135,8 @@ gfc_init_builtin_functions (void)
#undef DEF_FUNCTION_TYPE_8 #undef DEF_FUNCTION_TYPE_8
#undef DEF_FUNCTION_TYPE_VAR_0 #undef DEF_FUNCTION_TYPE_VAR_0
#undef DEF_FUNCTION_TYPE_VAR_2 #undef DEF_FUNCTION_TYPE_VAR_2
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE
builtin_types[(int) BT_LAST] = NULL_TREE; builtin_types[(int) BT_LAST] = NULL_TREE;

View File

@ -219,7 +219,6 @@ DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR, BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR,
BT_PTR, BT_INT, BT_INT) BT_PTR, BT_INT, BT_INT)
DEF_FUNCTION_TYPE_VAR_11 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR, DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
BT_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT, BT_INT, BT_PTR, BT_PTR, BT_PTR)
BT_INT, BT_INT)

View File

@ -1,3 +1,8 @@
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
2015-08-31 Richard Biener <rguenther@suse.de> 2015-08-31 Richard Biener <rguenther@suse.de>
* lto.c (compare_tree_sccs_1): Compare DECL_ABSTRACT_ORIGIN. * lto.c (compare_tree_sccs_1): Compare DECL_ABSTRACT_ORIGIN.

View File

@ -160,10 +160,10 @@ enum lto_builtin_type
#define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME, #define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME,
#define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG6) \ #define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG6) \
NAME, NAME,
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) NAME,
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) NAME, ARG6, ARG7) NAME,
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
#define DEF_POINTER_TYPE(NAME, TYPE) NAME, #define DEF_POINTER_TYPE(NAME, TYPE) NAME,
#include "builtin-types.def" #include "builtin-types.def"
#undef DEF_PRIMITIVE_TYPE #undef DEF_PRIMITIVE_TYPE
@ -182,8 +182,8 @@ enum lto_builtin_type
#undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_3
#undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_4
#undef DEF_FUNCTION_TYPE_VAR_5 #undef DEF_FUNCTION_TYPE_VAR_5
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE
BT_LAST BT_LAST
}; };
@ -668,13 +668,12 @@ lto_define_builtins (tree va_list_ref_type_node ATTRIBUTE_UNUSED,
def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4); def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4);
#define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \ #define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5); def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5);
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6) \
def_fn_type (ENUM, RETURN, 1, 6, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \ #define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7) \ ARG6, ARG7) \
def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7); def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7);
#define DEF_FUNCTION_TYPE_VAR_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
def_fn_type (ENUM, RETURN, 1, 11, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
ARG7, ARG8, ARG9, ARG10, ARG11);
#define DEF_POINTER_TYPE(ENUM, TYPE) \ #define DEF_POINTER_TYPE(ENUM, TYPE) \
builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]); builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]);
@ -696,8 +695,8 @@ lto_define_builtins (tree va_list_ref_type_node ATTRIBUTE_UNUSED,
#undef DEF_FUNCTION_TYPE_VAR_3 #undef DEF_FUNCTION_TYPE_VAR_3
#undef DEF_FUNCTION_TYPE_VAR_4 #undef DEF_FUNCTION_TYPE_VAR_4
#undef DEF_FUNCTION_TYPE_VAR_5 #undef DEF_FUNCTION_TYPE_VAR_5
#undef DEF_FUNCTION_TYPE_VAR_6
#undef DEF_FUNCTION_TYPE_VAR_7 #undef DEF_FUNCTION_TYPE_VAR_7
#undef DEF_FUNCTION_TYPE_VAR_11
#undef DEF_POINTER_TYPE #undef DEF_POINTER_TYPE
builtin_types[(int) BT_LAST] = NULL_TREE; builtin_types[(int) BT_LAST] = NULL_TREE;

View File

@ -38,8 +38,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end",
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data",
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR, BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
ATTR_NOTHROW_LIST) ATTR_NOTHROW_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed",
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR, BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
ATTR_NOTHROW_LIST) ATTR_NOTHROW_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR, BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,

View File

@ -82,7 +82,6 @@ along with GCC; see the file COPYING3. If not see
#include "lto-section-names.h" #include "lto-section-names.h"
#include "gomp-constants.h" #include "gomp-constants.h"
/* Lowering of OMP parallel and workshare constructs proceeds in two /* Lowering of OMP parallel and workshare constructs proceeds in two
phases. The first phase scans the function looking for OMP statements phases. The first phase scans the function looking for OMP statements
and then for variables that must be replaced to satisfy data sharing and then for variables that must be replaced to satisfy data sharing
@ -8869,6 +8868,110 @@ expand_omp_atomic (struct omp_region *region)
} }
/* Encode an oacc launc argument. This matches the GOMP_LAUNCH_PACK
macro on gomp-constants.h. We do not check for overflow. */
static tree
oacc_launch_pack (unsigned code, tree device, unsigned op)
{
tree res;
res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op));
if (device)
{
device = fold_build2 (LSHIFT_EXPR, unsigned_type_node,
device, build_int_cst (unsigned_type_node,
GOMP_LAUNCH_DEVICE_SHIFT));
res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device);
}
return res;
}
/* Look for compute grid dimension clauses and convert to an attribute
attached to FN. This permits the target-side code to (a) massage
the dimensions, (b) emit that data and (c) optimize. Non-constant
dimensions are pushed onto ARGS.
The attribute value is a TREE_LIST. A set of dimensions is
represented as a list of INTEGER_CST. Those that are runtime
expres are represented as an INTEGER_CST of zero.
TOOO. Normally the attribute will just contain a single such list. If
however it contains a list of lists, this will represent the use of
device_type. Each member of the outer list is an assoc list of
dimensions, keyed by the device type. The first entry will be the
default. Well, that's the plan. */
#define OACC_FN_ATTRIB "oacc function"
/* Replace any existing oacc fn attribute with updated dimensions. */
void
replace_oacc_fn_attrib (tree fn, tree dims)
{
tree ident = get_identifier (OACC_FN_ATTRIB);
tree attribs = DECL_ATTRIBUTES (fn);
/* If we happen to be present as the first attrib, drop it. */
if (attribs && TREE_PURPOSE (attribs) == ident)
attribs = TREE_CHAIN (attribs);
DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs);
}
/* Scan CLAUSES for launch dimensions and attach them to the oacc
function attribute. Push any that are non-constant onto the ARGS
list, along with an appropriate GOMP_LAUNCH_DIM tag. */
static void
set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
{
/* Must match GOMP_DIM ordering. */
static const omp_clause_code ids[]
= { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS,
OMP_CLAUSE_VECTOR_LENGTH };
unsigned ix;
tree dims[GOMP_DIM_MAX];
tree attr = NULL_TREE;
unsigned non_const = 0;
for (ix = GOMP_DIM_MAX; ix--;)
{
tree clause = find_omp_clause (clauses, ids[ix]);
tree dim = NULL_TREE;
if (clause)
dim = OMP_CLAUSE_EXPR (clause, ids[ix]);
dims[ix] = dim;
if (dim && TREE_CODE (dim) != INTEGER_CST)
{
dim = integer_zero_node;
non_const |= GOMP_DIM_MASK (ix);
}
attr = tree_cons (NULL_TREE, dim, attr);
}
replace_oacc_fn_attrib (fn, attr);
if (non_const)
{
/* Push a dynamic argument set. */
args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM,
NULL_TREE, non_const));
for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
if (non_const & GOMP_DIM_MASK (ix))
args->safe_push (dims[ix]);
}
}
/* Retrieve the oacc function attrib and return it. Non-oacc
functions will return NULL. */
tree
get_oacc_fn_attrib (tree fn)
{
return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
}
/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
static void static void
@ -8889,10 +8992,10 @@ expand_omp_target (struct omp_region *region)
offloaded = is_gimple_omp_offloaded (entry_stmt); offloaded = is_gimple_omp_offloaded (entry_stmt);
switch (gimple_omp_target_kind (entry_stmt)) switch (gimple_omp_target_kind (entry_stmt))
{ {
case GF_OMP_TARGET_KIND_REGION:
case GF_OMP_TARGET_KIND_UPDATE:
case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_PARALLEL:
case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_KERNELS:
case GF_OMP_TARGET_KIND_REGION:
case GF_OMP_TARGET_KIND_UPDATE:
case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_UPDATE:
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
data_region = false; data_region = false;
@ -9224,6 +9327,7 @@ expand_omp_target (struct omp_region *region)
} }
gimple *g; gimple *g;
bool tagging = false;
/* The maximum number used by any start_ix, without varargs. */ /* The maximum number used by any start_ix, without varargs. */
auto_vec<tree, 11> args; auto_vec<tree, 11> args;
args.quick_push (device); args.quick_push (device);
@ -9259,88 +9363,87 @@ expand_omp_target (struct omp_region *region)
break; break;
case BUILT_IN_GOACC_PARALLEL: case BUILT_IN_GOACC_PARALLEL:
{ {
tree t_num_gangs, t_num_workers, t_vector_length; set_oacc_fn_attrib (child_fn, clauses, &args);
tagging = true;
/* Default values for num_gangs, num_workers, and vector_length. */
t_num_gangs = t_num_workers = t_vector_length
= fold_convert_loc (gimple_location (entry_stmt),
integer_type_node, integer_one_node);
/* ..., but if present, use the value specified by the respective
clause, making sure that are of the correct type. */
c = find_omp_clause (clauses, OMP_CLAUSE_NUM_GANGS);
if (c)
t_num_gangs = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
integer_type_node,
OMP_CLAUSE_NUM_GANGS_EXPR (c));
c = find_omp_clause (clauses, OMP_CLAUSE_NUM_WORKERS);
if (c)
t_num_workers = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
integer_type_node,
OMP_CLAUSE_NUM_WORKERS_EXPR (c));
c = find_omp_clause (clauses, OMP_CLAUSE_VECTOR_LENGTH);
if (c)
t_vector_length = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
integer_type_node,
OMP_CLAUSE_VECTOR_LENGTH_EXPR (c));
args.quick_push (t_num_gangs);
args.quick_push (t_num_workers);
args.quick_push (t_vector_length);
} }
/* FALLTHRU */ /* FALLTHRU */
case BUILT_IN_GOACC_ENTER_EXIT_DATA: case BUILT_IN_GOACC_ENTER_EXIT_DATA:
case BUILT_IN_GOACC_UPDATE: case BUILT_IN_GOACC_UPDATE:
{ {
tree t_async; tree t_async = NULL_TREE;
int t_wait_idx;
/* Default values for t_async. */ /* If present, use the value specified by the respective
t_async = fold_convert_loc (gimple_location (entry_stmt),
integer_type_node,
build_int_cst (integer_type_node,
GOMP_ASYNC_SYNC));
/* ..., but if present, use the value specified by the respective
clause, making sure that is of the correct type. */ clause, making sure that is of the correct type. */
c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC); c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC);
if (c) if (c)
t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
integer_type_node, integer_type_node,
OMP_CLAUSE_ASYNC_EXPR (c)); OMP_CLAUSE_ASYNC_EXPR (c));
else if (!tagging)
args.quick_push (t_async); /* Default values for t_async. */
/* Save the index, and... */ t_async = fold_convert_loc (gimple_location (entry_stmt),
t_wait_idx = args.length (); integer_type_node,
/* ... push a default value. */ build_int_cst (integer_type_node,
args.quick_push (fold_convert_loc (gimple_location (entry_stmt), GOMP_ASYNC_SYNC));
integer_type_node, if (tagging && t_async)
integer_zero_node));
c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
if (c)
{ {
int n = 0; unsigned HOST_WIDE_INT i_async;
for (; c; c = OMP_CLAUSE_CHAIN (c)) if (TREE_CODE (t_async) == INTEGER_CST)
{ {
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) /* See if we can pack the async arg in to the tag's
{ operand. */
args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), i_async = TREE_INT_CST_LOW (t_async);
integer_type_node,
OMP_CLAUSE_WAIT_EXPR (c)));
n++;
}
}
/* Now that we know the number, replace the default value. */ if (i_async < GOMP_LAUNCH_OP_MAX)
args.ordered_remove (t_wait_idx); t_async = NULL_TREE;
args.quick_insert (t_wait_idx, }
fold_convert_loc (gimple_location (entry_stmt), if (t_async)
integer_type_node, i_async = GOMP_LAUNCH_OP_MAX;
build_int_cst (integer_type_node, n))); args.safe_push (oacc_launch_pack
(GOMP_LAUNCH_ASYNC, NULL_TREE, i_async));
}
if (t_async)
args.safe_push (t_async);
/* Save the argument index, and ... */
unsigned t_wait_idx = args.length ();
unsigned num_waits = 0;
c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
if (!tagging || c)
/* ... push a placeholder. */
args.safe_push (integer_zero_node);
for (; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
{
args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
integer_type_node,
OMP_CLAUSE_WAIT_EXPR (c)));
num_waits++;
}
if (!tagging || num_waits)
{
tree len;
/* Now that we know the number, update the placeholder. */
if (tagging)
len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
else
len = build_int_cst (integer_type_node, num_waits);
len = fold_convert_loc (gimple_location (entry_stmt),
unsigned_type_node, len);
args[t_wait_idx] = len;
} }
} }
break; break;
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
if (tagging)
/* Push terminal marker - zero. */
args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
gimple_set_location (g, gimple_location (entry_stmt)); gimple_set_location (g, gimple_location (entry_stmt));

View File

@ -29,6 +29,7 @@ extern tree omp_reduction_init_op (location_t, enum tree_code, tree);
extern tree omp_reduction_init (tree, tree); extern tree omp_reduction_init (tree, tree);
extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *); extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
extern void omp_finish_file (void); extern void omp_finish_file (void);
extern tree get_oacc_fn_attrib (tree);
extern GTY(()) vec<tree, va_gc> *offload_funcs; extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars; extern GTY(()) vec<tree, va_gc> *offload_vars;

View File

@ -1,3 +1,4 @@
/* Definitions for the ubiquitous 'tree' type for GNU compilers. /* Definitions for the ubiquitous 'tree' type for GNU compilers.
Copyright (C) 1989-2015 Free Software Foundation, Inc. Copyright (C) 1989-2015 Free Software Foundation, Inc.
@ -1369,6 +1370,8 @@ extern void protected_set_expr_location (tree, location_t);
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SCHEDULE), 0) OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SCHEDULE), 0)
/* OpenACC clause expressions */ /* OpenACC clause expressions */
#define OMP_CLAUSE_EXPR(NODE, CLAUSE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, CLAUSE), 0)
#define OMP_CLAUSE_GANG_EXPR(NODE) \ #define OMP_CLAUSE_GANG_EXPR(NODE) \
OMP_CLAUSE_OPERAND ( \ OMP_CLAUSE_OPERAND ( \
OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GANG), 0) OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GANG), 0)

View File

@ -1,3 +1,15 @@
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
2015-08-24 Nathan Sidwell <nathan@codesourcery.com> 2015-08-24 Nathan Sidwell <nathan@codesourcery.com>
* gomp-constants.h (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX, * gomp-constants.h (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX,

View File

@ -115,11 +115,33 @@ enum gomp_map_kind
/* Versions of libgomp and device-specific plugins. */ /* Versions of libgomp and device-specific plugins. */
#define GOMP_VERSION 0 #define GOMP_VERSION 0
#define GOMP_VERSION_NVIDIA_PTX 0 #define GOMP_VERSION_NVIDIA_PTX 1
#define GOMP_VERSION_INTEL_MIC 0 #define GOMP_VERSION_INTEL_MIC 0
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) #define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) #define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff) #define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff)
#define GOMP_DIM_GANG 0
#define GOMP_DIM_WORKER 1
#define GOMP_DIM_VECTOR 2
#define GOMP_DIM_MAX 3
#define GOMP_DIM_MASK(X) (1u << (X))
/* Varadic launch arguments. End of list is marked by a zero. */
#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */
#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */
#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */
#define GOMP_LAUNCH_CODE_SHIFT 28
#define GOMP_LAUNCH_DEVICE_SHIFT 16
#define GOMP_LAUNCH_OP_SHIFT 0
#define GOMP_LAUNCH_PACK(CODE,DEVICE,OP) \
(((CODE) << GOMP_LAUNCH_CODE_SHIFT) \
| ((DEVICE) << GOMP_LAUNCH_DEVICE_SHIFT) \
| ((OP) << GOMP_LAUNCH_OP_SHIFT))
#define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf)
#define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff)
#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
#define GOMP_LAUNCH_OP_MAX 0xffff
#endif #endif

View File

@ -1,3 +1,27 @@
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
2015-09-22 Chung-Lin Tang <cltang@codesourcery.com> 2015-09-22 Chung-Lin Tang <cltang@codesourcery.com>
PR libgomp/67141 PR libgomp/67141

View File

@ -695,7 +695,7 @@ typedef struct acc_dispatch_t
/* Execute. */ /* Execute. */
void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *, void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *,
unsigned short *, int, int, int, int, void *); unsigned short *, int, unsigned *, void *);
/* Async cleanup callback registration. */ /* Async cleanup callback registration. */
void (*register_async_cleanup_func) (void *); void (*register_async_cleanup_func) (void *);

View File

@ -332,6 +332,11 @@ GOACC_2.0 {
GOACC_get_num_threads; GOACC_get_num_threads;
}; };
GOACC_2.0.1 {
global:
GOACC_parallel_keyed;
} GOACC_2.0;
GOMP_PLUGIN_1.0 { GOMP_PLUGIN_1.0 {
global: global:
GOMP_PLUGIN_malloc; GOMP_PLUGIN_malloc;

View File

@ -222,9 +222,8 @@ extern void GOACC_data_start (int, size_t, void **, size_t *,
extern void GOACC_data_end (void); extern void GOACC_data_end (void);
extern void GOACC_enter_exit_data (int, size_t, void **, extern void GOACC_enter_exit_data (int, size_t, void **,
size_t *, unsigned short *, int, int, ...); size_t *, unsigned short *, int, int, ...);
extern void GOACC_parallel (int, void (*) (void *), size_t, extern void GOACC_parallel_keyd (int, void (*) (void *), size_t,
void **, size_t *, unsigned short *, int, int, int, void **, size_t *, unsigned short *, ...);
int, int, ...);
extern void GOACC_update (int, size_t, void **, size_t *, extern void GOACC_update (int, size_t, void **, size_t *,
unsigned short *, int, int, ...); unsigned short *, int, int, ...);
extern void GOACC_wait (int, int, ...); extern void GOACC_wait (int, int, ...);

View File

@ -137,10 +137,8 @@ host_openacc_exec (void (*fn) (void *),
void **devaddrs __attribute__ ((unused)), void **devaddrs __attribute__ ((unused)),
size_t *sizes __attribute__ ((unused)), size_t *sizes __attribute__ ((unused)),
unsigned short *kinds __attribute__ ((unused)), unsigned short *kinds __attribute__ ((unused)),
int num_gangs __attribute__ ((unused)),
int num_workers __attribute__ ((unused)),
int vector_length __attribute__ ((unused)),
int async __attribute__ ((unused)), int async __attribute__ ((unused)),
unsigned *dims __attribute ((unused)),
void *targ_mem_desc __attribute__ ((unused))) void *targ_mem_desc __attribute__ ((unused)))
{ {
fn (hostaddrs); fn (hostaddrs);

View File

@ -49,14 +49,18 @@ find_pset (int pos, size_t mapnum, unsigned short *kinds)
return kind == GOMP_MAP_TO_PSET; return kind == GOMP_MAP_TO_PSET;
} }
static void goacc_wait (int async, int num_waits, va_list ap); static void goacc_wait (int async, int num_waits, va_list *ap);
/* Launch a possibly offloaded function on DEVICE. FN is the host fn
address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
blocks to be copied to/from the device. Varadic arguments are
keyed optional parameters terminated with a zero. */
void void
GOACC_parallel (int device, void (*fn) (void *), GOACC_parallel_keyed (int device, void (*fn) (void *),
size_t mapnum, void **hostaddrs, size_t *sizes, size_t mapnum, void **hostaddrs, size_t *sizes,
unsigned short *kinds, unsigned short *kinds, ...)
int num_gangs, int num_workers, int vector_length,
int async, int num_waits, ...)
{ {
bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
va_list ap; va_list ap;
@ -68,22 +72,16 @@ GOACC_parallel (int device, void (*fn) (void *),
struct splay_tree_key_s k; struct splay_tree_key_s k;
splay_tree_key tgt_fn_key; splay_tree_key tgt_fn_key;
void (*tgt_fn); void (*tgt_fn);
int async = GOMP_ASYNC_SYNC;
if (num_gangs != 1) unsigned dims[GOMP_DIM_MAX];
gomp_fatal ("num_gangs (%d) different from one is not yet supported", unsigned tag;
num_gangs);
if (num_workers != 1)
gomp_fatal ("num_workers (%d) different from one is not yet supported",
num_workers);
#ifdef HAVE_INTTYPES_H #ifdef HAVE_INTTYPES_H
gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, " gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
"async = %d\n", __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
__FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
#else #else
gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n", gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds, __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
async);
#endif #endif
goacc_lazy_initialize (); goacc_lazy_initialize ();
@ -105,12 +103,51 @@ GOACC_parallel (int device, void (*fn) (void *),
return; return;
} }
if (num_waits) va_start (ap, kinds);
/* TODO: This will need amending when device_type is implemented. */
while ((tag = va_arg (ap, unsigned)) != 0)
{ {
va_start (ap, num_waits); if (GOMP_LAUNCH_DEVICE (tag))
goacc_wait (async, num_waits, ap); gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
va_end (ap); GOMP_LAUNCH_DEVICE (tag));
switch (GOMP_LAUNCH_CODE (tag))
{
case GOMP_LAUNCH_DIM:
{
unsigned mask = GOMP_LAUNCH_OP (tag);
for (i = 0; i != GOMP_DIM_MAX; i++)
if (mask & GOMP_DIM_MASK (i))
dims[i] = va_arg (ap, unsigned);
}
break;
case GOMP_LAUNCH_ASYNC:
{
/* Small constant values are encoded in the operand. */
async = GOMP_LAUNCH_OP (tag);
if (async == GOMP_LAUNCH_OP_MAX)
async = va_arg (ap, unsigned);
break;
}
case GOMP_LAUNCH_WAIT:
{
unsigned num_waits = GOMP_LAUNCH_OP (tag);
if (num_waits)
goacc_wait (async, num_waits, &ap);
break;
}
default:
gomp_fatal ("unrecognized offload code '%d',"
" libgomp is too old", GOMP_LAUNCH_CODE (tag));
}
} }
va_end (ap);
acc_dev->openacc.async_set_async_func (async); acc_dev->openacc.async_set_async_func (async);
@ -138,9 +175,8 @@ GOACC_parallel (int device, void (*fn) (void *),
devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
+ tgt->list[i]->tgt_offset); + tgt->list[i]->tgt_offset);
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds, acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes,
num_gangs, num_workers, vector_length, async, kinds, async, dims, tgt);
tgt);
/* If running synchronously, unmap immediately. */ /* If running synchronously, unmap immediately. */
if (async < acc_async_noval) if (async < acc_async_noval)
@ -154,6 +190,20 @@ GOACC_parallel (int device, void (*fn) (void *),
acc_dev->openacc.async_set_async_func (acc_async_sync); acc_dev->openacc.async_set_async_func (acc_async_sync);
} }
/* Legacy entry point, only provide host execution. */
void
GOACC_parallel (int device, void (*fn) (void *),
size_t mapnum, void **hostaddrs, size_t *sizes,
unsigned short *kinds,
int num_gangs, int num_workers, int vector_length,
int async, int num_waits, ...)
{
goacc_save_and_set_bind (acc_device_host);
fn (hostaddrs);
goacc_restore_bind ();
}
void void
GOACC_data_start (int device, size_t mapnum, GOACC_data_start (int device, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds) void **hostaddrs, size_t *sizes, unsigned short *kinds)
@ -230,7 +280,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
va_list ap; va_list ap;
va_start (ap, num_waits); va_start (ap, num_waits);
goacc_wait (async, num_waits, ap); goacc_wait (async, num_waits, &ap);
va_end (ap); va_end (ap);
} }
@ -344,15 +394,15 @@ GOACC_enter_exit_data (int device, size_t mapnum,
} }
static void static void
goacc_wait (int async, int num_waits, va_list ap) goacc_wait (int async, int num_waits, va_list *ap)
{ {
struct goacc_thread *thr = goacc_thread (); struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev; struct gomp_device_descr *acc_dev = thr->dev;
while (num_waits--) while (num_waits--)
{ {
int qid = va_arg (ap, int); int qid = va_arg (*ap, int);
if (acc_async_test (qid)) if (acc_async_test (qid))
continue; continue;
@ -389,7 +439,7 @@ GOACC_update (int device, size_t mapnum,
va_list ap; va_list ap;
va_start (ap, num_waits); va_start (ap, num_waits);
goacc_wait (async, num_waits, ap); goacc_wait (async, num_waits, &ap);
va_end (ap); va_end (ap);
} }
@ -430,7 +480,7 @@ GOACC_wait (int async, int num_waits, ...)
va_list ap; va_list ap;
va_start (ap, num_waits); va_start (ap, num_waits);
goacc_wait (async, num_waits, ap); goacc_wait (async, num_waits, &ap);
va_end (ap); va_end (ap);
} }
else if (async == acc_async_sync) else if (async == acc_async_sync)

View File

@ -282,12 +282,20 @@ map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
return; return;
} }
/* Target data function launch information. */
struct targ_fn_launch
{
const char *fn;
unsigned short dim[3];
};
/* Descriptor of a loaded function. */ /* Descriptor of a loaded function. */
struct targ_fn_descriptor struct targ_fn_descriptor
{ {
CUfunction fn; CUfunction fn;
const char *name; const struct targ_fn_launch *launch;
}; };
/* A loaded PTX image. */ /* A loaded PTX image. */
@ -929,8 +937,8 @@ event_add (enum ptx_event_type type, CUevent *e, void *h)
void void
nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers, size_t *sizes, unsigned short *kinds, int async, unsigned *dims,
int vector_length, int async, void *targ_mem_desc) void *targ_mem_desc)
{ {
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn; struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
CUfunction function; CUfunction function;
@ -939,7 +947,6 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
struct ptx_stream *dev_str; struct ptx_stream *dev_str;
void *kargs[1]; void *kargs[1];
void *hp, *dp; void *hp, *dp;
unsigned int nthreads_in_block;
struct nvptx_thread *nvthd = nvptx_thread (); struct nvptx_thread *nvthd = nvptx_thread ();
const char *maybe_abort_msg = "(perhaps abort was called)"; const char *maybe_abort_msg = "(perhaps abort was called)";
@ -948,6 +955,20 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
dev_str = select_stream_for_async (async, pthread_self (), false, NULL); dev_str = select_stream_for_async (async, pthread_self (), false, NULL);
assert (dev_str == nvthd->current_stream); assert (dev_str == nvthd->current_stream);
/* Initialize the launch dimensions. Typically this is constant,
provided by the device compiler, but we must permit runtime
values. */
for (i = 0; i != 3; i++)
if (targ_fn->launch->dim[i])
dims[i] = targ_fn->launch->dim[i];
if (dims[GOMP_DIM_GANG] != 1)
GOMP_PLUGIN_fatal ("non-unity num_gangs (%d) not supported",
dims[GOMP_DIM_GANG]);
if (dims[GOMP_DIM_WORKER] != 1)
GOMP_PLUGIN_fatal ("non-unity num_workers (%d) not supported",
dims[GOMP_DIM_WORKER]);
/* This reserves a chunk of a pre-allocated page of memory mapped on both /* This reserves a chunk of a pre-allocated page of memory mapped on both
the host and the device. HP is a host pointer to the new chunk, and DP is the host and the device. HP is a host pointer to the new chunk, and DP is
the corresponding device pointer. */ the corresponding device pointer. */
@ -965,35 +986,21 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
if (r != CUDA_SUCCESS) if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuMemcpy failed: %s", cuda_error (r)); GOMP_PLUGIN_fatal ("cuMemcpy failed: %s", cuda_error (r));
GOMP_PLUGIN_debug (0, " %s: kernel %s: launch\n", __FUNCTION__, targ_fn->name); GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
" gangs=%u, workers=%u, vectors=%u\n",
__FUNCTION__, targ_fn->launch->fn,
dims[0], dims[1], dims[2]);
// OpenACC CUDA // OpenACC CUDA
// //
// num_gangs blocks // num_gangs nctaid.x
// num_workers warps (where a warp is equivalent to 32 threads) // num_workers ntid.y
// vector length threads // vector length ntid.x
//
/* The openacc vector_length clause 'determines the vector length to use for
vector or SIMD operations'. The question is how to map this to CUDA.
In CUDA, the warp size is the vector length of a CUDA device. However, the
CUDA interface abstracts away from that, and only shows us warp size
indirectly in maximum number of threads per block, which is a product of
warp size and the number of hyperthreads of a multiprocessor.
We choose to map openacc vector_length directly onto the number of threads
in a block, in the x dimension. This is reflected in gcc code generation
that uses ThreadIdx.x to access vector elements.
Attempting to use an openacc vector_length of more than the maximum number
of threads per block will result in a cuda error. */
nthreads_in_block = vector_length;
kargs[0] = &dp; kargs[0] = &dp;
r = cuLaunchKernel (function, r = cuLaunchKernel (function,
num_gangs, 1, 1, dims[GOMP_DIM_GANG], 1, 1,
nthreads_in_block, 1, 1, dims[GOMP_DIM_VECTOR], dims[GOMP_DIM_WORKER], 1,
0, dev_str->stream, kargs, 0); 0, dev_str->stream, kargs, 0);
if (r != CUDA_SUCCESS) if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r)); GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
@ -1039,7 +1046,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
#endif #endif
GOMP_PLUGIN_debug (0, " %s: kernel %s: finished\n", __FUNCTION__, GOMP_PLUGIN_debug (0, " %s: kernel %s: finished\n", __FUNCTION__,
targ_fn->name); targ_fn->launch->fn);
#ifndef DISABLE_ASYNC #ifndef DISABLE_ASYNC
if (async < acc_async_noval) if (async < acc_async_noval)
@ -1567,7 +1574,7 @@ typedef struct nvptx_tdata
const char *const *var_names; const char *const *var_names;
size_t var_num; size_t var_num;
const char *const *fn_names; const struct targ_fn_launch *fn_descs;
size_t fn_num; size_t fn_num;
} nvptx_tdata_t; } nvptx_tdata_t;
@ -1588,7 +1595,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct addr_pair **target_table) struct addr_pair **target_table)
{ {
CUmodule module; CUmodule module;
const char *const *fn_names, *const *var_names; const char *const *var_names;
const struct targ_fn_launch *fn_descs;
unsigned int fn_entries, var_entries, i, j; unsigned int fn_entries, var_entries, i, j;
CUresult r; CUresult r;
struct targ_fn_descriptor *targ_fns; struct targ_fn_descriptor *targ_fns;
@ -1617,7 +1625,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
var_entries = img_header->var_num; var_entries = img_header->var_num;
var_names = img_header->var_names; var_names = img_header->var_names;
fn_entries = img_header->fn_num; fn_entries = img_header->fn_num;
fn_names = img_header->fn_names; fn_descs = img_header->fn_descs;
targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair) targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
* (fn_entries + var_entries)); * (fn_entries + var_entries));
@ -1640,12 +1648,12 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
{ {
CUfunction function; CUfunction function;
r = cuModuleGetFunction (&function, module, fn_names[i]); r = cuModuleGetFunction (&function, module, fn_descs[i].fn);
if (r != CUDA_SUCCESS) if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r)); GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
targ_fns->fn = function; targ_fns->fn = function;
targ_fns->name = (const char *) fn_names[i]; targ_fns->launch = &fn_descs[i];
targ_tbl->start = (uintptr_t) targ_fns; targ_tbl->start = (uintptr_t) targ_fns;
targ_tbl->end = targ_tbl->start + 1; targ_tbl->end = targ_tbl->start + 1;
@ -1724,13 +1732,12 @@ void (*device_run) (int n, void *fn_ptr, void *vars) = NULL;
void void
GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *), size_t mapnum, GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *), size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes, void **hostaddrs, void **devaddrs,
unsigned short *kinds, int num_gangs, size_t *sizes, unsigned short *kinds,
int num_workers, int vector_length, int async, int async, unsigned *dims, void *targ_mem_desc)
void *targ_mem_desc)
{ {
nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs, nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
num_workers, vector_length, async, targ_mem_desc); async, dims, targ_mem_desc);
} }
void void