gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/ * gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment. (GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX, GOMP_DIM_MASK): New. (GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New. (GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT, GOMP_LAUNCH_OP_SHIFT): New. (GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE, GOMP_LAUNCH_OP): New. (GOMP_LAUNCH_OP_MAX): New. libgomp/ * libgomp.h (acc_dispatch_t): Replace separate geometry args with array. * libgomp.map (GOACC_parallel_keyed): New. * oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust all callers. (GOACC_parallel_keyed): New interface. Lose geometry arguments and take keyed varargs list. Adjust call to exec_func. (GOACC_parallel): Force host fallback. * libgomp_g.h (GOACC_parallel): Remove. (GOACC_parallel_keyed): Declare. * plugin/plugin-nvptx.c (struct targ_fn_launch): New struct. (stuct targ_gn_descriptor): Replace name field with launch field. (nvptx_exec): Lose separate geometry args, take array. Process dynamic dimensions and adjust. (struct nvptx_tdata): Replace fn_names field with fn_descs. (GOMP_OFFLOAD_load_image): Adjust for change in function table data. (GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension passing. * oacc-host.c (host_openacc_exec): Adjust for change in dimension passing. gcc/ * config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h. (nvptx_record_offload_symbol): Record function execution geometry. * config/nvptx/mkoffload.c (process): Include launch geometry in function data. * omp-low.c (oacc_launch_pack): New. (replace_oacc_fn_attrib): New. (set_oacc_fn_attrib): New. (get_oacc_fn_attrib): New. (expand_omp_target): Create keyed varargs for GOACC_parallel call generation. * omp-low.h (get_oacc_fn_attrib): Declare. * builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New. (DEF_FUNCTION_TYPE_VAR_11): Delete. * tree.h (OMP_CLAUSE_EXPR): New. * omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name. gcc/lto/ * lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New. (DEF_FUNCTION_TYPE_VAR_11): Delete. gcc/c-family/ * c-common.c (DEF_FUNCTION_TYPE_VAR_6): New. (DEF_FUNCTION_TYPE_VAR_11): Delete. gcc/fortran/ * f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New. (DEF_FUNCTION_TYPE_VAR_11): Delete. * types.def (DEF_FUNCTION_TYPE_VAR_6): New. (DEF_FUNCTION_TYPE_VAR_11): Delete. gcc/ada/ * gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define From-SVN: r228220
This commit is contained in:
parent
4e671509d9
commit
3e32ee19a5
@ -1,3 +1,21 @@
|
||||
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
|
||||
(nvptx_record_offload_symbol): Record function execution geometry.
|
||||
* config/nvptx/mkoffload.c (process): Include launch geometry in
|
||||
function data.
|
||||
* omp-low.c (oacc_launch_pack): New.
|
||||
(replace_oacc_fn_attrib): New.
|
||||
(set_oacc_fn_attrib): New.
|
||||
(get_oacc_fn_attrib): New.
|
||||
(expand_omp_target): Create keyed varargs for GOACC_parallel call
|
||||
generation.
|
||||
* omp-low.h (get_oacc_fn_attrib): Declare.
|
||||
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
|
||||
(DEF_FUNCTION_TYPE_VAR_11): Delete.
|
||||
* tree.h (OMP_CLAUSE_EXPR): New.
|
||||
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
|
||||
|
||||
2015-09-28 Aditya Kumar <aditya.k7@samsung.com>
|
||||
Sebastian Pop <s.pop@samsung.com>
|
||||
|
||||
|
@ -1,3 +1,7 @@
|
||||
2015-09-28 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
|
||||
|
||||
2015-09-17 Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
* gcc-interface/trans.c (emit_check): Do not touch TREE_SIDE_EFFECTS.
|
||||
|
@ -5376,6 +5376,8 @@ enum c_builtin_type
|
||||
#define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
|
||||
NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
@ -5398,6 +5400,7 @@ enum c_builtin_type
|
||||
#undef DEF_FUNCTION_TYPE_VAR_3
|
||||
#undef DEF_FUNCTION_TYPE_VAR_4
|
||||
#undef DEF_FUNCTION_TYPE_VAR_5
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
@ -5505,6 +5508,9 @@ install_builtin_function_types (void)
|
||||
def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4);
|
||||
#define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
|
||||
def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5);
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) \
|
||||
def_fn_type (ENUM, RETURN, 1, 6, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) \
|
||||
def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7);
|
||||
@ -5533,6 +5539,7 @@ install_builtin_function_types (void)
|
||||
#undef DEF_FUNCTION_TYPE_VAR_3
|
||||
#undef DEF_FUNCTION_TYPE_VAR_4
|
||||
#undef DEF_FUNCTION_TYPE_VAR_5
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
|
@ -590,15 +590,14 @@ DEF_FUNCTION_TYPE_VAR_5 (BT_FN_INT_STRING_SIZE_INT_SIZE_CONST_STRING_VAR,
|
||||
DEF_FUNCTION_TYPE_VAR_5 (BT_FN_INT_INT_INT_INT_INT_INT_VAR,
|
||||
BT_INT, BT_INT, BT_INT, BT_INT, BT_INT, BT_INT)
|
||||
|
||||
DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
||||
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
||||
BT_PTR, BT_PTR, BT_PTR)
|
||||
|
||||
DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||
BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR,
|
||||
BT_PTR, BT_INT, BT_INT)
|
||||
|
||||
DEF_FUNCTION_TYPE_VAR_11 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR,
|
||||
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
||||
BT_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT, BT_INT,
|
||||
BT_INT, BT_INT)
|
||||
|
||||
DEF_POINTER_TYPE (BT_PTR_FN_VOID_VAR, BT_FN_VOID_VAR)
|
||||
DEF_FUNCTION_TYPE_3 (BT_FN_PTR_PTR_FN_VOID_VAR_PTR_SIZE,
|
||||
BT_PTR, BT_PTR_FN_VOID_VAR, BT_PTR, BT_SIZE)
|
||||
|
@ -1,3 +1,8 @@
|
||||
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
|
||||
(DEF_FUNCTION_TYPE_VAR_11): Delete.
|
||||
|
||||
2015-09-25 Marek Polacek <polacek@redhat.com>
|
||||
|
||||
* c-ubsan.c (ubsan_instrument_division): Remove unnecessary code.
|
||||
|
@ -5548,10 +5548,10 @@ enum c_builtin_type
|
||||
#define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
|
||||
NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
|
||||
#define DEF_POINTER_TYPE(NAME, TYPE) NAME,
|
||||
#include "builtin-types.def"
|
||||
#undef DEF_PRIMITIVE_TYPE
|
||||
@ -5570,8 +5570,8 @@ enum c_builtin_type
|
||||
#undef DEF_FUNCTION_TYPE_VAR_3
|
||||
#undef DEF_FUNCTION_TYPE_VAR_4
|
||||
#undef DEF_FUNCTION_TYPE_VAR_5
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
BT_LAST
|
||||
};
|
||||
@ -5664,13 +5664,12 @@ c_define_builtins (tree va_list_ref_type_node, tree va_list_arg_type_node)
|
||||
def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4);
|
||||
#define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
|
||||
def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5);
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) \
|
||||
def_fn_type (ENUM, RETURN, 1, 6, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) \
|
||||
def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7);
|
||||
#define DEF_FUNCTION_TYPE_VAR_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
|
||||
def_fn_type (ENUM, RETURN, 1, 11, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
|
||||
ARG7, ARG8, ARG9, ARG10, ARG11);
|
||||
#define DEF_POINTER_TYPE(ENUM, TYPE) \
|
||||
builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]);
|
||||
|
||||
@ -5692,8 +5691,8 @@ c_define_builtins (tree va_list_ref_type_node, tree va_list_arg_type_node)
|
||||
#undef DEF_FUNCTION_TYPE_VAR_3
|
||||
#undef DEF_FUNCTION_TYPE_VAR_4
|
||||
#undef DEF_FUNCTION_TYPE_VAR_5
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
builtin_types[(int) BT_LAST] = NULL_TREE;
|
||||
|
||||
|
@ -842,6 +842,8 @@ process (FILE *in, FILE *out)
|
||||
{
|
||||
const char *input = read_file (in);
|
||||
Token *tok = tokenize (input);
|
||||
const char *comma;
|
||||
id_map const *id;
|
||||
|
||||
do
|
||||
tok = parse_file (tok);
|
||||
@ -853,21 +855,25 @@ process (FILE *in, FILE *out)
|
||||
write_stmts (out, rev_stmts (fns));
|
||||
fprintf (out, ";\n\n");
|
||||
|
||||
fprintf (out, "static const char *const var_mappings[] = {\n");
|
||||
for (id_map *id = var_ids; id; id = id->next)
|
||||
fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : "");
|
||||
fprintf (out, "};\n\n");
|
||||
fprintf (out, "static const char *const func_mappings[] = {\n");
|
||||
for (id_map *id = func_ids; id; id = id->next)
|
||||
fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : "");
|
||||
fprintf (out, "};\n\n");
|
||||
fprintf (out, "static const char *const var_mappings[] = {");
|
||||
for (comma = "", id = var_ids; id; comma = ",", id = id->next)
|
||||
fprintf (out, "%s\n\t%s", comma, id->ptx_name);
|
||||
fprintf (out, "\n};\n\n");
|
||||
|
||||
fprintf (out, "static const struct nvptx_fn {\n"
|
||||
" const char *name;\n"
|
||||
" unsigned short dim[3];\n"
|
||||
"} func_mappings[] = {\n");
|
||||
for (comma = "", id = func_ids; id; comma = ",", id = id->next)
|
||||
fprintf (out, "%s\n\t{%s}", comma, id->ptx_name);
|
||||
fprintf (out, "\n};\n\n");
|
||||
|
||||
fprintf (out,
|
||||
"static const struct nvptx_tdata {\n"
|
||||
" const char *ptx_src;\n"
|
||||
" const char *const *var_names;\n"
|
||||
" __SIZE_TYPE__ var_num;\n"
|
||||
" const char *const *fn_names;\n"
|
||||
" const struct nvptx_fn *fn_names;\n"
|
||||
" __SIZE_TYPE__ fn_num;\n"
|
||||
"} target_data = {\n"
|
||||
" ptx_code,\n"
|
||||
|
@ -56,6 +56,8 @@
|
||||
#include "cfgrtl.h"
|
||||
#include "stor-layout.h"
|
||||
#include "builtins.h"
|
||||
#include "omp-low.h"
|
||||
#include "gomp-constants.h"
|
||||
|
||||
/* This file should be included last. */
|
||||
#include "target-def.h"
|
||||
@ -2066,9 +2068,51 @@ nvptx_vector_alignment (const_tree type)
|
||||
static void
|
||||
nvptx_record_offload_symbol (tree decl)
|
||||
{
|
||||
fprintf (asm_out_file, "//:%s_MAP %s\n",
|
||||
TREE_CODE (decl) == VAR_DECL ? "VAR" : "FUNC",
|
||||
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
|
||||
switch (TREE_CODE (decl))
|
||||
{
|
||||
case VAR_DECL:
|
||||
fprintf (asm_out_file, "//:VAR_MAP \"%s\"\n",
|
||||
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
|
||||
break;
|
||||
|
||||
case FUNCTION_DECL:
|
||||
{
|
||||
tree attr = get_oacc_fn_attrib (decl);
|
||||
tree dims = NULL_TREE;
|
||||
unsigned ix;
|
||||
|
||||
if (attr)
|
||||
dims = TREE_VALUE (attr);
|
||||
fprintf (asm_out_file, "//:FUNC_MAP \"%s\"",
|
||||
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
|
||||
|
||||
for (ix = 0; ix != GOMP_DIM_MAX; ix++)
|
||||
{
|
||||
int size = 1;
|
||||
|
||||
/* TODO: This check can go away once the dimension default
|
||||
machinery is merged to trunk. */
|
||||
if (dims)
|
||||
{
|
||||
tree dim = TREE_VALUE (dims);
|
||||
|
||||
if (dim)
|
||||
size = TREE_INT_CST_LOW (dim);
|
||||
|
||||
gcc_assert (!TREE_PURPOSE (dims));
|
||||
dims = TREE_CHAIN (dims);
|
||||
}
|
||||
|
||||
fprintf (asm_out_file, ", %#x", size);
|
||||
}
|
||||
|
||||
fprintf (asm_out_file, "\n");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects
|
||||
|
@ -1,3 +1,10 @@
|
||||
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
|
||||
(DEF_FUNCTION_TYPE_VAR_11): Delete.
|
||||
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
|
||||
(DEF_FUNCTION_TYPE_VAR_11): Delete.
|
||||
|
||||
2015-09-26 Mikael Morin <mikael@gcc.gnu.org>
|
||||
|
||||
PR fortran/67721
|
||||
|
@ -635,10 +635,10 @@ gfc_init_builtin_functions (void)
|
||||
ARG6, ARG7, ARG8) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
|
||||
#define DEF_POINTER_TYPE(NAME, TYPE) NAME,
|
||||
#include "types.def"
|
||||
#undef DEF_PRIMITIVE_TYPE
|
||||
@ -653,8 +653,8 @@ gfc_init_builtin_functions (void)
|
||||
#undef DEF_FUNCTION_TYPE_8
|
||||
#undef DEF_FUNCTION_TYPE_VAR_0
|
||||
#undef DEF_FUNCTION_TYPE_VAR_2
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
BT_LAST
|
||||
};
|
||||
@ -1096,6 +1096,17 @@ gfc_init_builtin_functions (void)
|
||||
builtin_types[(int) ARG1], \
|
||||
builtin_types[(int) ARG2], \
|
||||
NULL_TREE);
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) \
|
||||
builtin_types[(int) ENUM] \
|
||||
= build_varargs_function_type_list (builtin_types[(int) RETURN], \
|
||||
builtin_types[(int) ARG1], \
|
||||
builtin_types[(int) ARG2], \
|
||||
builtin_types[(int) ARG3], \
|
||||
builtin_types[(int) ARG4], \
|
||||
builtin_types[(int) ARG5], \
|
||||
builtin_types[(int) ARG6], \
|
||||
NULL_TREE);
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) \
|
||||
builtin_types[(int) ENUM] \
|
||||
@ -1108,22 +1119,6 @@ gfc_init_builtin_functions (void)
|
||||
builtin_types[(int) ARG6], \
|
||||
builtin_types[(int) ARG7], \
|
||||
NULL_TREE);
|
||||
#define DEF_FUNCTION_TYPE_VAR_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
|
||||
builtin_types[(int) ENUM] \
|
||||
= build_varargs_function_type_list (builtin_types[(int) RETURN], \
|
||||
builtin_types[(int) ARG1], \
|
||||
builtin_types[(int) ARG2], \
|
||||
builtin_types[(int) ARG3], \
|
||||
builtin_types[(int) ARG4], \
|
||||
builtin_types[(int) ARG5], \
|
||||
builtin_types[(int) ARG6], \
|
||||
builtin_types[(int) ARG7], \
|
||||
builtin_types[(int) ARG8], \
|
||||
builtin_types[(int) ARG9], \
|
||||
builtin_types[(int) ARG10], \
|
||||
builtin_types[(int) ARG11], \
|
||||
NULL_TREE);
|
||||
#define DEF_POINTER_TYPE(ENUM, TYPE) \
|
||||
builtin_types[(int) ENUM] \
|
||||
= build_pointer_type (builtin_types[(int) TYPE]);
|
||||
@ -1140,8 +1135,8 @@ gfc_init_builtin_functions (void)
|
||||
#undef DEF_FUNCTION_TYPE_8
|
||||
#undef DEF_FUNCTION_TYPE_VAR_0
|
||||
#undef DEF_FUNCTION_TYPE_VAR_2
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
builtin_types[(int) BT_LAST] = NULL_TREE;
|
||||
|
||||
|
@ -219,7 +219,6 @@ DEF_FUNCTION_TYPE_VAR_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||
BT_VOID, BT_INT, BT_SIZE, BT_PTR, BT_PTR,
|
||||
BT_PTR, BT_INT, BT_INT)
|
||||
|
||||
DEF_FUNCTION_TYPE_VAR_11 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR,
|
||||
DEF_FUNCTION_TYPE_VAR_6 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
||||
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE,
|
||||
BT_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT, BT_INT,
|
||||
BT_INT, BT_INT)
|
||||
BT_PTR, BT_PTR, BT_PTR)
|
||||
|
@ -1,3 +1,8 @@
|
||||
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
|
||||
(DEF_FUNCTION_TYPE_VAR_11): Delete.
|
||||
|
||||
2015-08-31 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* lto.c (compare_tree_sccs_1): Compare DECL_ABSTRACT_ORIGIN.
|
||||
|
@ -160,10 +160,10 @@ enum lto_builtin_type
|
||||
#define DEF_FUNCTION_TYPE_VAR_4(NAME, RETURN, ARG1, ARG2, ARG3, ARG4) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_5(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG6) \
|
||||
NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) NAME,
|
||||
#define DEF_FUNCTION_TYPE_VAR_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
|
||||
#define DEF_POINTER_TYPE(NAME, TYPE) NAME,
|
||||
#include "builtin-types.def"
|
||||
#undef DEF_PRIMITIVE_TYPE
|
||||
@ -182,8 +182,8 @@ enum lto_builtin_type
|
||||
#undef DEF_FUNCTION_TYPE_VAR_3
|
||||
#undef DEF_FUNCTION_TYPE_VAR_4
|
||||
#undef DEF_FUNCTION_TYPE_VAR_5
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
BT_LAST
|
||||
};
|
||||
@ -668,13 +668,12 @@ lto_define_builtins (tree va_list_ref_type_node ATTRIBUTE_UNUSED,
|
||||
def_fn_type (ENUM, RETURN, 1, 4, ARG1, ARG2, ARG3, ARG4);
|
||||
#define DEF_FUNCTION_TYPE_VAR_5(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5) \
|
||||
def_fn_type (ENUM, RETURN, 1, 5, ARG1, ARG2, ARG3, ARG4, ARG5);
|
||||
#define DEF_FUNCTION_TYPE_VAR_6(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6) \
|
||||
def_fn_type (ENUM, RETURN, 1, 6, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
|
||||
#define DEF_FUNCTION_TYPE_VAR_7(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7) \
|
||||
def_fn_type (ENUM, RETURN, 1, 7, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, ARG7);
|
||||
#define DEF_FUNCTION_TYPE_VAR_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
|
||||
ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
|
||||
def_fn_type (ENUM, RETURN, 1, 11, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
|
||||
ARG7, ARG8, ARG9, ARG10, ARG11);
|
||||
#define DEF_POINTER_TYPE(ENUM, TYPE) \
|
||||
builtin_types[(int) ENUM] = build_pointer_type (builtin_types[(int) TYPE]);
|
||||
|
||||
@ -696,8 +695,8 @@ lto_define_builtins (tree va_list_ref_type_node ATTRIBUTE_UNUSED,
|
||||
#undef DEF_FUNCTION_TYPE_VAR_3
|
||||
#undef DEF_FUNCTION_TYPE_VAR_4
|
||||
#undef DEF_FUNCTION_TYPE_VAR_5
|
||||
#undef DEF_FUNCTION_TYPE_VAR_6
|
||||
#undef DEF_FUNCTION_TYPE_VAR_7
|
||||
#undef DEF_FUNCTION_TYPE_VAR_11
|
||||
#undef DEF_POINTER_TYPE
|
||||
builtin_types[(int) BT_LAST] = NULL_TREE;
|
||||
|
||||
|
@ -38,8 +38,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end",
|
||||
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data",
|
||||
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||
ATTR_NOTHROW_LIST)
|
||||
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel",
|
||||
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR,
|
||||
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed",
|
||||
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
|
||||
ATTR_NOTHROW_LIST)
|
||||
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
|
||||
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
|
||||
|
229
gcc/omp-low.c
229
gcc/omp-low.c
@ -82,7 +82,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "lto-section-names.h"
|
||||
#include "gomp-constants.h"
|
||||
|
||||
|
||||
/* Lowering of OMP parallel and workshare constructs proceeds in two
|
||||
phases. The first phase scans the function looking for OMP statements
|
||||
and then for variables that must be replaced to satisfy data sharing
|
||||
@ -8869,6 +8868,110 @@ expand_omp_atomic (struct omp_region *region)
|
||||
}
|
||||
|
||||
|
||||
/* Encode an oacc launc argument. This matches the GOMP_LAUNCH_PACK
|
||||
macro on gomp-constants.h. We do not check for overflow. */
|
||||
|
||||
static tree
|
||||
oacc_launch_pack (unsigned code, tree device, unsigned op)
|
||||
{
|
||||
tree res;
|
||||
|
||||
res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op));
|
||||
if (device)
|
||||
{
|
||||
device = fold_build2 (LSHIFT_EXPR, unsigned_type_node,
|
||||
device, build_int_cst (unsigned_type_node,
|
||||
GOMP_LAUNCH_DEVICE_SHIFT));
|
||||
res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Look for compute grid dimension clauses and convert to an attribute
|
||||
attached to FN. This permits the target-side code to (a) massage
|
||||
the dimensions, (b) emit that data and (c) optimize. Non-constant
|
||||
dimensions are pushed onto ARGS.
|
||||
|
||||
The attribute value is a TREE_LIST. A set of dimensions is
|
||||
represented as a list of INTEGER_CST. Those that are runtime
|
||||
expres are represented as an INTEGER_CST of zero.
|
||||
|
||||
TOOO. Normally the attribute will just contain a single such list. If
|
||||
however it contains a list of lists, this will represent the use of
|
||||
device_type. Each member of the outer list is an assoc list of
|
||||
dimensions, keyed by the device type. The first entry will be the
|
||||
default. Well, that's the plan. */
|
||||
|
||||
#define OACC_FN_ATTRIB "oacc function"
|
||||
|
||||
/* Replace any existing oacc fn attribute with updated dimensions. */
|
||||
|
||||
void
|
||||
replace_oacc_fn_attrib (tree fn, tree dims)
|
||||
{
|
||||
tree ident = get_identifier (OACC_FN_ATTRIB);
|
||||
tree attribs = DECL_ATTRIBUTES (fn);
|
||||
|
||||
/* If we happen to be present as the first attrib, drop it. */
|
||||
if (attribs && TREE_PURPOSE (attribs) == ident)
|
||||
attribs = TREE_CHAIN (attribs);
|
||||
DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs);
|
||||
}
|
||||
|
||||
/* Scan CLAUSES for launch dimensions and attach them to the oacc
|
||||
function attribute. Push any that are non-constant onto the ARGS
|
||||
list, along with an appropriate GOMP_LAUNCH_DIM tag. */
|
||||
|
||||
static void
|
||||
set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
|
||||
{
|
||||
/* Must match GOMP_DIM ordering. */
|
||||
static const omp_clause_code ids[]
|
||||
= { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS,
|
||||
OMP_CLAUSE_VECTOR_LENGTH };
|
||||
unsigned ix;
|
||||
tree dims[GOMP_DIM_MAX];
|
||||
tree attr = NULL_TREE;
|
||||
unsigned non_const = 0;
|
||||
|
||||
for (ix = GOMP_DIM_MAX; ix--;)
|
||||
{
|
||||
tree clause = find_omp_clause (clauses, ids[ix]);
|
||||
tree dim = NULL_TREE;
|
||||
|
||||
if (clause)
|
||||
dim = OMP_CLAUSE_EXPR (clause, ids[ix]);
|
||||
dims[ix] = dim;
|
||||
if (dim && TREE_CODE (dim) != INTEGER_CST)
|
||||
{
|
||||
dim = integer_zero_node;
|
||||
non_const |= GOMP_DIM_MASK (ix);
|
||||
}
|
||||
attr = tree_cons (NULL_TREE, dim, attr);
|
||||
}
|
||||
|
||||
replace_oacc_fn_attrib (fn, attr);
|
||||
|
||||
if (non_const)
|
||||
{
|
||||
/* Push a dynamic argument set. */
|
||||
args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM,
|
||||
NULL_TREE, non_const));
|
||||
for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
|
||||
if (non_const & GOMP_DIM_MASK (ix))
|
||||
args->safe_push (dims[ix]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Retrieve the oacc function attrib and return it. Non-oacc
|
||||
functions will return NULL. */
|
||||
|
||||
tree
|
||||
get_oacc_fn_attrib (tree fn)
|
||||
{
|
||||
return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
|
||||
}
|
||||
|
||||
/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
|
||||
|
||||
static void
|
||||
@ -8889,10 +8992,10 @@ expand_omp_target (struct omp_region *region)
|
||||
offloaded = is_gimple_omp_offloaded (entry_stmt);
|
||||
switch (gimple_omp_target_kind (entry_stmt))
|
||||
{
|
||||
case GF_OMP_TARGET_KIND_REGION:
|
||||
case GF_OMP_TARGET_KIND_UPDATE:
|
||||
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
|
||||
case GF_OMP_TARGET_KIND_OACC_KERNELS:
|
||||
case GF_OMP_TARGET_KIND_REGION:
|
||||
case GF_OMP_TARGET_KIND_UPDATE:
|
||||
case GF_OMP_TARGET_KIND_OACC_UPDATE:
|
||||
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
|
||||
data_region = false;
|
||||
@ -9224,6 +9327,7 @@ expand_omp_target (struct omp_region *region)
|
||||
}
|
||||
|
||||
gimple *g;
|
||||
bool tagging = false;
|
||||
/* The maximum number used by any start_ix, without varargs. */
|
||||
auto_vec<tree, 11> args;
|
||||
args.quick_push (device);
|
||||
@ -9259,88 +9363,87 @@ expand_omp_target (struct omp_region *region)
|
||||
break;
|
||||
case BUILT_IN_GOACC_PARALLEL:
|
||||
{
|
||||
tree t_num_gangs, t_num_workers, t_vector_length;
|
||||
|
||||
/* Default values for num_gangs, num_workers, and vector_length. */
|
||||
t_num_gangs = t_num_workers = t_vector_length
|
||||
= fold_convert_loc (gimple_location (entry_stmt),
|
||||
integer_type_node, integer_one_node);
|
||||
/* ..., but if present, use the value specified by the respective
|
||||
clause, making sure that are of the correct type. */
|
||||
c = find_omp_clause (clauses, OMP_CLAUSE_NUM_GANGS);
|
||||
if (c)
|
||||
t_num_gangs = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
|
||||
integer_type_node,
|
||||
OMP_CLAUSE_NUM_GANGS_EXPR (c));
|
||||
c = find_omp_clause (clauses, OMP_CLAUSE_NUM_WORKERS);
|
||||
if (c)
|
||||
t_num_workers = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
|
||||
integer_type_node,
|
||||
OMP_CLAUSE_NUM_WORKERS_EXPR (c));
|
||||
c = find_omp_clause (clauses, OMP_CLAUSE_VECTOR_LENGTH);
|
||||
if (c)
|
||||
t_vector_length = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
|
||||
integer_type_node,
|
||||
OMP_CLAUSE_VECTOR_LENGTH_EXPR (c));
|
||||
args.quick_push (t_num_gangs);
|
||||
args.quick_push (t_num_workers);
|
||||
args.quick_push (t_vector_length);
|
||||
set_oacc_fn_attrib (child_fn, clauses, &args);
|
||||
tagging = true;
|
||||
}
|
||||
/* FALLTHRU */
|
||||
case BUILT_IN_GOACC_ENTER_EXIT_DATA:
|
||||
case BUILT_IN_GOACC_UPDATE:
|
||||
{
|
||||
tree t_async;
|
||||
int t_wait_idx;
|
||||
tree t_async = NULL_TREE;
|
||||
|
||||
/* Default values for t_async. */
|
||||
t_async = fold_convert_loc (gimple_location (entry_stmt),
|
||||
integer_type_node,
|
||||
build_int_cst (integer_type_node,
|
||||
GOMP_ASYNC_SYNC));
|
||||
/* ..., but if present, use the value specified by the respective
|
||||
/* If present, use the value specified by the respective
|
||||
clause, making sure that is of the correct type. */
|
||||
c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC);
|
||||
if (c)
|
||||
t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
|
||||
integer_type_node,
|
||||
OMP_CLAUSE_ASYNC_EXPR (c));
|
||||
|
||||
args.quick_push (t_async);
|
||||
/* Save the index, and... */
|
||||
t_wait_idx = args.length ();
|
||||
/* ... push a default value. */
|
||||
args.quick_push (fold_convert_loc (gimple_location (entry_stmt),
|
||||
integer_type_node,
|
||||
integer_zero_node));
|
||||
c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
|
||||
if (c)
|
||||
else if (!tagging)
|
||||
/* Default values for t_async. */
|
||||
t_async = fold_convert_loc (gimple_location (entry_stmt),
|
||||
integer_type_node,
|
||||
build_int_cst (integer_type_node,
|
||||
GOMP_ASYNC_SYNC));
|
||||
if (tagging && t_async)
|
||||
{
|
||||
int n = 0;
|
||||
unsigned HOST_WIDE_INT i_async;
|
||||
|
||||
for (; c; c = OMP_CLAUSE_CHAIN (c))
|
||||
if (TREE_CODE (t_async) == INTEGER_CST)
|
||||
{
|
||||
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
|
||||
{
|
||||
args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
|
||||
integer_type_node,
|
||||
OMP_CLAUSE_WAIT_EXPR (c)));
|
||||
n++;
|
||||
}
|
||||
}
|
||||
/* See if we can pack the async arg in to the tag's
|
||||
operand. */
|
||||
i_async = TREE_INT_CST_LOW (t_async);
|
||||
|
||||
/* Now that we know the number, replace the default value. */
|
||||
args.ordered_remove (t_wait_idx);
|
||||
args.quick_insert (t_wait_idx,
|
||||
fold_convert_loc (gimple_location (entry_stmt),
|
||||
integer_type_node,
|
||||
build_int_cst (integer_type_node, n)));
|
||||
if (i_async < GOMP_LAUNCH_OP_MAX)
|
||||
t_async = NULL_TREE;
|
||||
}
|
||||
if (t_async)
|
||||
i_async = GOMP_LAUNCH_OP_MAX;
|
||||
args.safe_push (oacc_launch_pack
|
||||
(GOMP_LAUNCH_ASYNC, NULL_TREE, i_async));
|
||||
}
|
||||
if (t_async)
|
||||
args.safe_push (t_async);
|
||||
|
||||
/* Save the argument index, and ... */
|
||||
unsigned t_wait_idx = args.length ();
|
||||
unsigned num_waits = 0;
|
||||
c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
|
||||
if (!tagging || c)
|
||||
/* ... push a placeholder. */
|
||||
args.safe_push (integer_zero_node);
|
||||
|
||||
for (; c; c = OMP_CLAUSE_CHAIN (c))
|
||||
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
|
||||
{
|
||||
args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
|
||||
integer_type_node,
|
||||
OMP_CLAUSE_WAIT_EXPR (c)));
|
||||
num_waits++;
|
||||
}
|
||||
|
||||
if (!tagging || num_waits)
|
||||
{
|
||||
tree len;
|
||||
|
||||
/* Now that we know the number, update the placeholder. */
|
||||
if (tagging)
|
||||
len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
|
||||
else
|
||||
len = build_int_cst (integer_type_node, num_waits);
|
||||
len = fold_convert_loc (gimple_location (entry_stmt),
|
||||
unsigned_type_node, len);
|
||||
args[t_wait_idx] = len;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
if (tagging)
|
||||
/* Push terminal marker - zero. */
|
||||
args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
|
||||
|
||||
g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
|
||||
gimple_set_location (g, gimple_location (entry_stmt));
|
||||
|
@ -29,6 +29,7 @@ extern tree omp_reduction_init_op (location_t, enum tree_code, tree);
|
||||
extern tree omp_reduction_init (tree, tree);
|
||||
extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
|
||||
extern void omp_finish_file (void);
|
||||
extern tree get_oacc_fn_attrib (tree);
|
||||
|
||||
extern GTY(()) vec<tree, va_gc> *offload_funcs;
|
||||
extern GTY(()) vec<tree, va_gc> *offload_vars;
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
/* Definitions for the ubiquitous 'tree' type for GNU compilers.
|
||||
Copyright (C) 1989-2015 Free Software Foundation, Inc.
|
||||
|
||||
@ -1369,6 +1370,8 @@ extern void protected_set_expr_location (tree, location_t);
|
||||
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SCHEDULE), 0)
|
||||
|
||||
/* OpenACC clause expressions */
|
||||
#define OMP_CLAUSE_EXPR(NODE, CLAUSE) \
|
||||
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, CLAUSE), 0)
|
||||
#define OMP_CLAUSE_GANG_EXPR(NODE) \
|
||||
OMP_CLAUSE_OPERAND ( \
|
||||
OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GANG), 0)
|
||||
|
@ -1,3 +1,15 @@
|
||||
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
|
||||
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
|
||||
GOMP_DIM_MASK): New.
|
||||
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
|
||||
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
|
||||
GOMP_LAUNCH_OP_SHIFT): New.
|
||||
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
|
||||
GOMP_LAUNCH_OP): New.
|
||||
(GOMP_LAUNCH_OP_MAX): New.
|
||||
|
||||
2015-08-24 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* gomp-constants.h (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX,
|
||||
|
@ -115,11 +115,33 @@ enum gomp_map_kind
|
||||
|
||||
/* Versions of libgomp and device-specific plugins. */
|
||||
#define GOMP_VERSION 0
|
||||
#define GOMP_VERSION_NVIDIA_PTX 0
|
||||
#define GOMP_VERSION_NVIDIA_PTX 1
|
||||
#define GOMP_VERSION_INTEL_MIC 0
|
||||
|
||||
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
|
||||
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
|
||||
#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff)
|
||||
|
||||
#define GOMP_DIM_GANG 0
|
||||
#define GOMP_DIM_WORKER 1
|
||||
#define GOMP_DIM_VECTOR 2
|
||||
#define GOMP_DIM_MAX 3
|
||||
#define GOMP_DIM_MASK(X) (1u << (X))
|
||||
|
||||
/* Varadic launch arguments. End of list is marked by a zero. */
|
||||
#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */
|
||||
#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */
|
||||
#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */
|
||||
#define GOMP_LAUNCH_CODE_SHIFT 28
|
||||
#define GOMP_LAUNCH_DEVICE_SHIFT 16
|
||||
#define GOMP_LAUNCH_OP_SHIFT 0
|
||||
#define GOMP_LAUNCH_PACK(CODE,DEVICE,OP) \
|
||||
(((CODE) << GOMP_LAUNCH_CODE_SHIFT) \
|
||||
| ((DEVICE) << GOMP_LAUNCH_DEVICE_SHIFT) \
|
||||
| ((OP) << GOMP_LAUNCH_OP_SHIFT))
|
||||
#define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf)
|
||||
#define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff)
|
||||
#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
|
||||
#define GOMP_LAUNCH_OP_MAX 0xffff
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,27 @@
|
||||
2015-09-28 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
|
||||
array.
|
||||
* libgomp.map (GOACC_parallel_keyed): New.
|
||||
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
|
||||
all callers.
|
||||
(GOACC_parallel_keyed): New interface. Lose geometry arguments
|
||||
and take keyed varargs list. Adjust call to exec_func.
|
||||
(GOACC_parallel): Force host fallback.
|
||||
* libgomp_g.h (GOACC_parallel): Remove.
|
||||
(GOACC_parallel_keyed): Declare.
|
||||
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
|
||||
(stuct targ_gn_descriptor): Replace name field with launch field.
|
||||
(nvptx_exec): Lose separate geometry args, take array. Process
|
||||
dynamic dimensions and adjust.
|
||||
(struct nvptx_tdata): Replace fn_names field with fn_descs.
|
||||
(GOMP_OFFLOAD_load_image): Adjust for change in function table
|
||||
data.
|
||||
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
|
||||
passing.
|
||||
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
|
||||
passing.
|
||||
|
||||
2015-09-22 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
PR libgomp/67141
|
||||
|
@ -695,7 +695,7 @@ typedef struct acc_dispatch_t
|
||||
|
||||
/* Execute. */
|
||||
void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *,
|
||||
unsigned short *, int, int, int, int, void *);
|
||||
unsigned short *, int, unsigned *, void *);
|
||||
|
||||
/* Async cleanup callback registration. */
|
||||
void (*register_async_cleanup_func) (void *);
|
||||
|
@ -332,6 +332,11 @@ GOACC_2.0 {
|
||||
GOACC_get_num_threads;
|
||||
};
|
||||
|
||||
GOACC_2.0.1 {
|
||||
global:
|
||||
GOACC_parallel_keyed;
|
||||
} GOACC_2.0;
|
||||
|
||||
GOMP_PLUGIN_1.0 {
|
||||
global:
|
||||
GOMP_PLUGIN_malloc;
|
||||
|
@ -222,9 +222,8 @@ extern void GOACC_data_start (int, size_t, void **, size_t *,
|
||||
extern void GOACC_data_end (void);
|
||||
extern void GOACC_enter_exit_data (int, size_t, void **,
|
||||
size_t *, unsigned short *, int, int, ...);
|
||||
extern void GOACC_parallel (int, void (*) (void *), size_t,
|
||||
void **, size_t *, unsigned short *, int, int, int,
|
||||
int, int, ...);
|
||||
extern void GOACC_parallel_keyd (int, void (*) (void *), size_t,
|
||||
void **, size_t *, unsigned short *, ...);
|
||||
extern void GOACC_update (int, size_t, void **, size_t *,
|
||||
unsigned short *, int, int, ...);
|
||||
extern void GOACC_wait (int, int, ...);
|
||||
|
@ -137,10 +137,8 @@ host_openacc_exec (void (*fn) (void *),
|
||||
void **devaddrs __attribute__ ((unused)),
|
||||
size_t *sizes __attribute__ ((unused)),
|
||||
unsigned short *kinds __attribute__ ((unused)),
|
||||
int num_gangs __attribute__ ((unused)),
|
||||
int num_workers __attribute__ ((unused)),
|
||||
int vector_length __attribute__ ((unused)),
|
||||
int async __attribute__ ((unused)),
|
||||
unsigned *dims __attribute ((unused)),
|
||||
void *targ_mem_desc __attribute__ ((unused)))
|
||||
{
|
||||
fn (hostaddrs);
|
||||
|
@ -49,14 +49,18 @@ find_pset (int pos, size_t mapnum, unsigned short *kinds)
|
||||
return kind == GOMP_MAP_TO_PSET;
|
||||
}
|
||||
|
||||
static void goacc_wait (int async, int num_waits, va_list ap);
|
||||
static void goacc_wait (int async, int num_waits, va_list *ap);
|
||||
|
||||
|
||||
/* Launch a possibly offloaded function on DEVICE. FN is the host fn
|
||||
address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
|
||||
blocks to be copied to/from the device. Varadic arguments are
|
||||
keyed optional parameters terminated with a zero. */
|
||||
|
||||
void
|
||||
GOACC_parallel (int device, void (*fn) (void *),
|
||||
size_t mapnum, void **hostaddrs, size_t *sizes,
|
||||
unsigned short *kinds,
|
||||
int num_gangs, int num_workers, int vector_length,
|
||||
int async, int num_waits, ...)
|
||||
GOACC_parallel_keyed (int device, void (*fn) (void *),
|
||||
size_t mapnum, void **hostaddrs, size_t *sizes,
|
||||
unsigned short *kinds, ...)
|
||||
{
|
||||
bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
|
||||
va_list ap;
|
||||
@ -68,22 +72,16 @@ GOACC_parallel (int device, void (*fn) (void *),
|
||||
struct splay_tree_key_s k;
|
||||
splay_tree_key tgt_fn_key;
|
||||
void (*tgt_fn);
|
||||
|
||||
if (num_gangs != 1)
|
||||
gomp_fatal ("num_gangs (%d) different from one is not yet supported",
|
||||
num_gangs);
|
||||
if (num_workers != 1)
|
||||
gomp_fatal ("num_workers (%d) different from one is not yet supported",
|
||||
num_workers);
|
||||
int async = GOMP_ASYNC_SYNC;
|
||||
unsigned dims[GOMP_DIM_MAX];
|
||||
unsigned tag;
|
||||
|
||||
#ifdef HAVE_INTTYPES_H
|
||||
gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
|
||||
"async = %d\n",
|
||||
__FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
|
||||
gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
|
||||
__FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
|
||||
#else
|
||||
gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
|
||||
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
|
||||
async);
|
||||
gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
|
||||
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
|
||||
#endif
|
||||
goacc_lazy_initialize ();
|
||||
|
||||
@ -105,12 +103,51 @@ GOACC_parallel (int device, void (*fn) (void *),
|
||||
return;
|
||||
}
|
||||
|
||||
if (num_waits)
|
||||
va_start (ap, kinds);
|
||||
/* TODO: This will need amending when device_type is implemented. */
|
||||
while ((tag = va_arg (ap, unsigned)) != 0)
|
||||
{
|
||||
va_start (ap, num_waits);
|
||||
goacc_wait (async, num_waits, ap);
|
||||
va_end (ap);
|
||||
if (GOMP_LAUNCH_DEVICE (tag))
|
||||
gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
|
||||
GOMP_LAUNCH_DEVICE (tag));
|
||||
|
||||
switch (GOMP_LAUNCH_CODE (tag))
|
||||
{
|
||||
case GOMP_LAUNCH_DIM:
|
||||
{
|
||||
unsigned mask = GOMP_LAUNCH_OP (tag);
|
||||
|
||||
for (i = 0; i != GOMP_DIM_MAX; i++)
|
||||
if (mask & GOMP_DIM_MASK (i))
|
||||
dims[i] = va_arg (ap, unsigned);
|
||||
}
|
||||
break;
|
||||
|
||||
case GOMP_LAUNCH_ASYNC:
|
||||
{
|
||||
/* Small constant values are encoded in the operand. */
|
||||
async = GOMP_LAUNCH_OP (tag);
|
||||
|
||||
if (async == GOMP_LAUNCH_OP_MAX)
|
||||
async = va_arg (ap, unsigned);
|
||||
break;
|
||||
}
|
||||
|
||||
case GOMP_LAUNCH_WAIT:
|
||||
{
|
||||
unsigned num_waits = GOMP_LAUNCH_OP (tag);
|
||||
|
||||
if (num_waits)
|
||||
goacc_wait (async, num_waits, &ap);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
gomp_fatal ("unrecognized offload code '%d',"
|
||||
" libgomp is too old", GOMP_LAUNCH_CODE (tag));
|
||||
}
|
||||
}
|
||||
va_end (ap);
|
||||
|
||||
acc_dev->openacc.async_set_async_func (async);
|
||||
|
||||
@ -138,9 +175,8 @@ GOACC_parallel (int device, void (*fn) (void *),
|
||||
devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
|
||||
+ tgt->list[i]->tgt_offset);
|
||||
|
||||
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
|
||||
num_gangs, num_workers, vector_length, async,
|
||||
tgt);
|
||||
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes,
|
||||
kinds, async, dims, tgt);
|
||||
|
||||
/* If running synchronously, unmap immediately. */
|
||||
if (async < acc_async_noval)
|
||||
@ -154,6 +190,20 @@ GOACC_parallel (int device, void (*fn) (void *),
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
}
|
||||
|
||||
/* Legacy entry point, only provide host execution. */
|
||||
|
||||
void
|
||||
GOACC_parallel (int device, void (*fn) (void *),
|
||||
size_t mapnum, void **hostaddrs, size_t *sizes,
|
||||
unsigned short *kinds,
|
||||
int num_gangs, int num_workers, int vector_length,
|
||||
int async, int num_waits, ...)
|
||||
{
|
||||
goacc_save_and_set_bind (acc_device_host);
|
||||
fn (hostaddrs);
|
||||
goacc_restore_bind ();
|
||||
}
|
||||
|
||||
void
|
||||
GOACC_data_start (int device, size_t mapnum,
|
||||
void **hostaddrs, size_t *sizes, unsigned short *kinds)
|
||||
@ -230,7 +280,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
|
||||
va_list ap;
|
||||
|
||||
va_start (ap, num_waits);
|
||||
goacc_wait (async, num_waits, ap);
|
||||
goacc_wait (async, num_waits, &ap);
|
||||
va_end (ap);
|
||||
}
|
||||
|
||||
@ -344,15 +394,15 @@ GOACC_enter_exit_data (int device, size_t mapnum,
|
||||
}
|
||||
|
||||
static void
|
||||
goacc_wait (int async, int num_waits, va_list ap)
|
||||
goacc_wait (int async, int num_waits, va_list *ap)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
struct gomp_device_descr *acc_dev = thr->dev;
|
||||
|
||||
while (num_waits--)
|
||||
{
|
||||
int qid = va_arg (ap, int);
|
||||
|
||||
int qid = va_arg (*ap, int);
|
||||
|
||||
if (acc_async_test (qid))
|
||||
continue;
|
||||
|
||||
@ -389,7 +439,7 @@ GOACC_update (int device, size_t mapnum,
|
||||
va_list ap;
|
||||
|
||||
va_start (ap, num_waits);
|
||||
goacc_wait (async, num_waits, ap);
|
||||
goacc_wait (async, num_waits, &ap);
|
||||
va_end (ap);
|
||||
}
|
||||
|
||||
@ -430,7 +480,7 @@ GOACC_wait (int async, int num_waits, ...)
|
||||
va_list ap;
|
||||
|
||||
va_start (ap, num_waits);
|
||||
goacc_wait (async, num_waits, ap);
|
||||
goacc_wait (async, num_waits, &ap);
|
||||
va_end (ap);
|
||||
}
|
||||
else if (async == acc_async_sync)
|
||||
|
@ -282,12 +282,20 @@ map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Target data function launch information. */
|
||||
|
||||
struct targ_fn_launch
|
||||
{
|
||||
const char *fn;
|
||||
unsigned short dim[3];
|
||||
};
|
||||
|
||||
/* Descriptor of a loaded function. */
|
||||
|
||||
struct targ_fn_descriptor
|
||||
{
|
||||
CUfunction fn;
|
||||
const char *name;
|
||||
const struct targ_fn_launch *launch;
|
||||
};
|
||||
|
||||
/* A loaded PTX image. */
|
||||
@ -929,8 +937,8 @@ event_add (enum ptx_event_type type, CUevent *e, void *h)
|
||||
|
||||
void
|
||||
nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
||||
size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers,
|
||||
int vector_length, int async, void *targ_mem_desc)
|
||||
size_t *sizes, unsigned short *kinds, int async, unsigned *dims,
|
||||
void *targ_mem_desc)
|
||||
{
|
||||
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
|
||||
CUfunction function;
|
||||
@ -939,7 +947,6 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
||||
struct ptx_stream *dev_str;
|
||||
void *kargs[1];
|
||||
void *hp, *dp;
|
||||
unsigned int nthreads_in_block;
|
||||
struct nvptx_thread *nvthd = nvptx_thread ();
|
||||
const char *maybe_abort_msg = "(perhaps abort was called)";
|
||||
|
||||
@ -948,6 +955,20 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
||||
dev_str = select_stream_for_async (async, pthread_self (), false, NULL);
|
||||
assert (dev_str == nvthd->current_stream);
|
||||
|
||||
/* Initialize the launch dimensions. Typically this is constant,
|
||||
provided by the device compiler, but we must permit runtime
|
||||
values. */
|
||||
for (i = 0; i != 3; i++)
|
||||
if (targ_fn->launch->dim[i])
|
||||
dims[i] = targ_fn->launch->dim[i];
|
||||
|
||||
if (dims[GOMP_DIM_GANG] != 1)
|
||||
GOMP_PLUGIN_fatal ("non-unity num_gangs (%d) not supported",
|
||||
dims[GOMP_DIM_GANG]);
|
||||
if (dims[GOMP_DIM_WORKER] != 1)
|
||||
GOMP_PLUGIN_fatal ("non-unity num_workers (%d) not supported",
|
||||
dims[GOMP_DIM_WORKER]);
|
||||
|
||||
/* This reserves a chunk of a pre-allocated page of memory mapped on both
|
||||
the host and the device. HP is a host pointer to the new chunk, and DP is
|
||||
the corresponding device pointer. */
|
||||
@ -965,35 +986,21 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuMemcpy failed: %s", cuda_error (r));
|
||||
|
||||
GOMP_PLUGIN_debug (0, " %s: kernel %s: launch\n", __FUNCTION__, targ_fn->name);
|
||||
GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
|
||||
" gangs=%u, workers=%u, vectors=%u\n",
|
||||
__FUNCTION__, targ_fn->launch->fn,
|
||||
dims[0], dims[1], dims[2]);
|
||||
|
||||
// OpenACC CUDA
|
||||
//
|
||||
// num_gangs blocks
|
||||
// num_workers warps (where a warp is equivalent to 32 threads)
|
||||
// vector length threads
|
||||
//
|
||||
|
||||
/* The openacc vector_length clause 'determines the vector length to use for
|
||||
vector or SIMD operations'. The question is how to map this to CUDA.
|
||||
|
||||
In CUDA, the warp size is the vector length of a CUDA device. However, the
|
||||
CUDA interface abstracts away from that, and only shows us warp size
|
||||
indirectly in maximum number of threads per block, which is a product of
|
||||
warp size and the number of hyperthreads of a multiprocessor.
|
||||
|
||||
We choose to map openacc vector_length directly onto the number of threads
|
||||
in a block, in the x dimension. This is reflected in gcc code generation
|
||||
that uses ThreadIdx.x to access vector elements.
|
||||
|
||||
Attempting to use an openacc vector_length of more than the maximum number
|
||||
of threads per block will result in a cuda error. */
|
||||
nthreads_in_block = vector_length;
|
||||
// num_gangs nctaid.x
|
||||
// num_workers ntid.y
|
||||
// vector length ntid.x
|
||||
|
||||
kargs[0] = &dp;
|
||||
r = cuLaunchKernel (function,
|
||||
num_gangs, 1, 1,
|
||||
nthreads_in_block, 1, 1,
|
||||
dims[GOMP_DIM_GANG], 1, 1,
|
||||
dims[GOMP_DIM_VECTOR], dims[GOMP_DIM_WORKER], 1,
|
||||
0, dev_str->stream, kargs, 0);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
|
||||
@ -1039,7 +1046,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
||||
#endif
|
||||
|
||||
GOMP_PLUGIN_debug (0, " %s: kernel %s: finished\n", __FUNCTION__,
|
||||
targ_fn->name);
|
||||
targ_fn->launch->fn);
|
||||
|
||||
#ifndef DISABLE_ASYNC
|
||||
if (async < acc_async_noval)
|
||||
@ -1567,7 +1574,7 @@ typedef struct nvptx_tdata
|
||||
const char *const *var_names;
|
||||
size_t var_num;
|
||||
|
||||
const char *const *fn_names;
|
||||
const struct targ_fn_launch *fn_descs;
|
||||
size_t fn_num;
|
||||
} nvptx_tdata_t;
|
||||
|
||||
@ -1588,7 +1595,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
||||
struct addr_pair **target_table)
|
||||
{
|
||||
CUmodule module;
|
||||
const char *const *fn_names, *const *var_names;
|
||||
const char *const *var_names;
|
||||
const struct targ_fn_launch *fn_descs;
|
||||
unsigned int fn_entries, var_entries, i, j;
|
||||
CUresult r;
|
||||
struct targ_fn_descriptor *targ_fns;
|
||||
@ -1617,7 +1625,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
||||
var_entries = img_header->var_num;
|
||||
var_names = img_header->var_names;
|
||||
fn_entries = img_header->fn_num;
|
||||
fn_names = img_header->fn_names;
|
||||
fn_descs = img_header->fn_descs;
|
||||
|
||||
targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
|
||||
* (fn_entries + var_entries));
|
||||
@ -1640,12 +1648,12 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
|
||||
{
|
||||
CUfunction function;
|
||||
|
||||
r = cuModuleGetFunction (&function, module, fn_names[i]);
|
||||
r = cuModuleGetFunction (&function, module, fn_descs[i].fn);
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
|
||||
|
||||
targ_fns->fn = function;
|
||||
targ_fns->name = (const char *) fn_names[i];
|
||||
targ_fns->launch = &fn_descs[i];
|
||||
|
||||
targ_tbl->start = (uintptr_t) targ_fns;
|
||||
targ_tbl->end = targ_tbl->start + 1;
|
||||
@ -1724,13 +1732,12 @@ void (*device_run) (int n, void *fn_ptr, void *vars) = NULL;
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *), size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs, size_t *sizes,
|
||||
unsigned short *kinds, int num_gangs,
|
||||
int num_workers, int vector_length, int async,
|
||||
void *targ_mem_desc)
|
||||
void **hostaddrs, void **devaddrs,
|
||||
size_t *sizes, unsigned short *kinds,
|
||||
int async, unsigned *dims, void *targ_mem_desc)
|
||||
{
|
||||
nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs,
|
||||
num_workers, vector_length, async, targ_mem_desc);
|
||||
nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
|
||||
async, dims, targ_mem_desc);
|
||||
}
|
||||
|
||||
void
|
||||
|
Loading…
Reference in New Issue
Block a user