b2b4005150
2016-01-19 Martin Jambor <mjambor@suse.cz> Martin Liska <mliska@suse.cz> Michael Matz <matz@suse.de> libgomp/ * plugin/Makefrag.am: Add HSA plugin requirements. * plugin/configfrag.ac (HSA_RUNTIME_INCLUDE): New variable. (HSA_RUNTIME_LIB): Likewise. (HSA_RUNTIME_CPPFLAGS): Likewise. (HSA_RUNTIME_INCLUDE): New substitution. (HSA_RUNTIME_LIB): Likewise. (HSA_RUNTIME_LDFLAGS): Likewise. (hsa-runtime): New configure option. (hsa-runtime-include): Likewise. (hsa-runtime-lib): Likewise. (PLUGIN_HSA): New substitution variable. Fill HSA_RUNTIME_INCLUDE and HSA_RUNTIME_LIB according to the new configure options. (PLUGIN_HSA_CPPFLAGS): Likewise. (PLUGIN_HSA_LDFLAGS): Likewise. (PLUGIN_HSA_LIBS): Likewise. Check that we have access to HSA run-time. * libgomp-plugin.h (offload_target_type): New element OFFLOAD_TARGET_TYPE_HSA. * libgomp.h (gomp_target_task): New fields firstprivate_copies and args. (bool gomp_create_target_task): Updated. (gomp_device_descr): Extra parameter of run_func and async_run_func, new field can_run_func. * libgomp_g.h (GOMP_target_ext): Update prototype. * oacc-host.c (host_run): Added a new parameter args. * target.c (calculate_firstprivate_requirements): New function. (copy_firstprivate_data): Likewise. (gomp_target_fallback_firstprivate): Use them. (gomp_target_unshare_firstprivate): New function. (gomp_get_target_fn_addr): Allow returning NULL for shared memory devices. (GOMP_target): Do host fallback for all shared memory devices. Do not pass any args to plugins. (GOMP_target_ext): Introduce device-specific argument parameter args. Allow host fallback if device shares memory. Do not remap data if device has shared memory. (gomp_target_task_fn): Likewise. Also treat shared memory devices like host fallback for mappings. (GOMP_target_data): Treat shared memory devices like host fallback. (GOMP_target_data_ext): Likewise. (GOMP_target_update): Likewise. (GOMP_target_update_ext): Likewise. Also pass NULL as args to gomp_create_target_task. (GOMP_target_enter_exit_data): Likewise. (omp_target_alloc): Treat shared memory devices like host fallback. (omp_target_free): Likewise. (omp_target_is_present): Likewise. (omp_target_memcpy): Likewise. (omp_target_memcpy_rect): Likewise. (omp_target_associate_ptr): Likewise. (gomp_load_plugin_for_device): Also load can_run. * task.c (GOMP_PLUGIN_target_task_completion): Free firstprivate_copies. (gomp_create_target_task): Accept new argument args and store it to ttask. * plugin/plugin-hsa.c: New file. gcc/ * Makefile.in (OBJS): Add new source files. (GTFILES): Add hsa.c. * common.opt (disable_hsa): New variable. (-Whsa): New warning. * config.in (ENABLE_HSA): New. * configure.ac: Treat hsa differently from other accelerators. (OFFLOAD_TARGETS): Define ENABLE_OFFLOADING according to $enable_offloading. (ENABLE_HSA): Define ENABLE_HSA according to $enable_hsa. * doc/install.texi (Configuration): Document --with-hsa-runtime, --with-hsa-runtime-include, --with-hsa-runtime-lib and --with-hsa-kmt-lib. * doc/invoke.texi (-Whsa): Document. (hsa-gen-debug-stores): Likewise. * lto-wrapper.c (compile_images_for_offload_targets): Do not attempt to invoke offload compiler for hsa acclerator. * opts.c (common_handle_option): Determine whether HSA offloading should be performed. * params.def (PARAM_HSA_GEN_DEBUG_STORES): New parameter. * builtin-types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. * gimple-low.c (lower_stmt): Also handle GIMPLE_OMP_GRID_BODY. * gimple-pretty-print.c (dump_gimple_omp_for): Also handle GF_OMP_FOR_KIND_GRID_LOOP. (dump_gimple_omp_block): Also handle GIMPLE_OMP_GRID_BODY. (pp_gimple_stmt_1): Likewise. * gimple-walk.c (walk_gimple_stmt): Likewise. * gimple.c (gimple_build_omp_grid_body): New function. (gimple_copy): Also handle GIMPLE_OMP_GRID_BODY. * gimple.def (GIMPLE_OMP_GRID_BODY): New. * gimple.h (enum gf_mask): Added GF_OMP_PARALLEL_GRID_PHONY, GF_OMP_FOR_KIND_GRID_LOOP, GF_OMP_FOR_GRID_PHONY and GF_OMP_TEAMS_GRID_PHONY. (gimple_statement_omp_single_layout): Updated comments. (gimple_build_omp_grid_body): New function. (gimple_has_substatements): Also handle GIMPLE_OMP_GRID_BODY. (gimple_omp_for_grid_phony): New function. (gimple_omp_for_set_grid_phony): Likewise. (gimple_omp_parallel_grid_phony): Likewise. (gimple_omp_parallel_set_grid_phony): Likewise. (gimple_omp_teams_grid_phony): Likewise. (gimple_omp_teams_set_grid_phony): Likewise. (gimple_return_set_retbnd): Also handle GIMPLE_OMP_GRID_BODY. * omp-builtins.def (BUILT_IN_GOMP_OFFLOAD_REGISTER): New. (BUILT_IN_GOMP_OFFLOAD_UNREGISTER): Likewise. (BUILT_IN_GOMP_TARGET): Updated type. * omp-low.c: Include symbol-summary.h, hsa.h and params.h. (adjust_for_condition): New function. (get_omp_for_step_from_incr): Likewise. (extract_omp_for_data): Moved parts to adjust_for_condition and get_omp_for_step_from_incr. (build_outer_var_ref): Handle GIMPLE_OMP_GRID_BODY. (fixup_child_record_type): Bail out if receiver_decl is NULL. (scan_sharing_clauses): Handle OMP_CLAUSE__GRIDDIM_. (scan_omp_parallel): Do not create child functions for phony constructs. (check_omp_nesting_restrictions): Handle GIMPLE_OMP_GRID_BODY. (scan_omp_1_op): Checking assert we are not remapping to ERROR_MARK. Also also handle GIMPLE_OMP_GRID_BODY. (parallel_needs_hsa_kernel_p): New function. (expand_parallel_call): Register apprpriate parallel child functions as HSA kernels. (grid_launch_attributes_trees): New type. (grid_attr_trees): New variable. (grid_create_kernel_launch_attr_types): New function. (grid_insert_store_range_dim): Likewise. (grid_get_kernel_launch_attributes): Likewise. (get_target_argument_identifier_1): Likewise. (get_target_argument_identifier): Likewise. (get_target_argument_value): Likewise. (push_target_argument_according_to_value): Likewise. (get_target_arguments): Likewise. (expand_omp_target): Call get_target_arguments instead of looking up for teams and thread limit. (grid_expand_omp_for_loop): New function. (grid_arg_decl_map): New type. (grid_remap_kernel_arg_accesses): New function. (grid_expand_target_kernel_body): New function. (expand_omp): Call it. (lower_omp_for): Do not emit phony constructs. (lower_omp_taskreg): Do not emit phony constructs but create for them a temporary variable receiver_decl. (lower_omp_taskreg): Do not emit phony constructs. (lower_omp_teams): Likewise. (lower_omp_grid_body): New function. (lower_omp_1): Call it. (grid_reg_assignment_to_local_var_p): New function. (grid_seq_only_contains_local_assignments): Likewise. (grid_find_single_omp_among_assignments_1): Likewise. (grid_find_single_omp_among_assignments): Likewise. (grid_find_ungridifiable_statement): Likewise. (grid_target_follows_gridifiable_pattern): Likewise. (grid_remap_prebody_decls): Likewise. (grid_copy_leading_local_assignments): Likewise. (grid_process_kernel_body_copy): Likewise. (grid_attempt_target_gridification): Likewise. (grid_gridify_all_targets_stmt): Likewise. (grid_gridify_all_targets): Likewise. (execute_lower_omp): Call grid_gridify_all_targets. (make_gimple_omp_edges): Handle GIMPLE_OMP_GRID_BODY. * tree-core.h (omp_clause_code): Added OMP_CLAUSE__GRIDDIM_. (tree_omp_clause): Added union field dimension. * tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE__GRIDDIM_. * tree.c (omp_clause_num_ops): Added number of arguments of OMP_CLAUSE__GRIDDIM_. (omp_clause_code_name): Added name of OMP_CLAUSE__GRIDDIM_. (walk_tree_1): Handle OMP_CLAUSE__GRIDDIM_. * tree.h (OMP_CLAUSE_GRIDDIM_DIMENSION): New. (OMP_CLAUSE_SET_GRIDDIM_DIMENSION): Likewise. (OMP_CLAUSE_GRIDDIM_SIZE): Likewise. (OMP_CLAUSE_GRIDDIM_GROUP): Likewise. * passes.def: Schedule pass_ipa_hsa and pass_gen_hsail. * tree-pass.h (make_pass_gen_hsail): Declare. (make_pass_ipa_hsa): Likewise. * ipa-hsa.c: New file. * lto-section-in.c (lto_section_name): Add hsa section name. * lto-streamer.h (lto_section_type): Add hsa section. * timevar.def (TV_IPA_HSA): New. * hsa-brig-format.h: New file. * hsa-brig.c: New file. * hsa-dump.c: Likewise. * hsa-gen.c: Likewise. * hsa.c: Likewise. * hsa.h: Likewise. * toplev.c (compile_file): Call hsa_output_brig. * hsa-regalloc.c: New file. gcc/fortran/ * types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. gcc/lto/ * lto-partition.c: Include "hsa.h" (add_symbol_to_partition_1): Put hsa implementations into the same partition as host implementations. liboffloadmic/ * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_async_run): New unused parameter. (GOMP_OFFLOAD_run): Likewise. include/ * gomp-constants.h (GOMP_DEVICE_HSA): New macro. (GOMP_VERSION_HSA): Likewise. (GOMP_TARGET_ARG_DEVICE_MASK): Likewise. (GOMP_TARGET_ARG_DEVICE_ALL): Likewise. (GOMP_TARGET_ARG_SUBSEQUENT_PARAM): Likewise. (GOMP_TARGET_ARG_ID_MASK): Likewise. (GOMP_TARGET_ARG_NUM_TEAMS): Likewise. (GOMP_TARGET_ARG_THREAD_LIMIT): Likewise. (GOMP_TARGET_ARG_VALUE_SHIFT): Likewise. (GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES): Likewise. From-SVN: r232549
332 lines
9.2 KiB
C
332 lines
9.2 KiB
C
/* Callgraph based analysis of static variables.
|
|
Copyright (C) 2015-2016 Free Software Foundation, Inc.
|
|
Contributed by Martin Liska <mliska@suse.cz>
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* Interprocedural HSA pass is responsible for creation of HSA clones.
|
|
For all these HSA clones, we emit HSAIL instructions and pass processing
|
|
is terminated. */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "tm.h"
|
|
#include "is-a.h"
|
|
#include "hash-set.h"
|
|
#include "vec.h"
|
|
#include "tree.h"
|
|
#include "tree-pass.h"
|
|
#include "function.h"
|
|
#include "basic-block.h"
|
|
#include "gimple.h"
|
|
#include "dumpfile.h"
|
|
#include "gimple-pretty-print.h"
|
|
#include "tree-streamer.h"
|
|
#include "stringpool.h"
|
|
#include "cgraph.h"
|
|
#include "print-tree.h"
|
|
#include "symbol-summary.h"
|
|
#include "hsa.h"
|
|
|
|
namespace {
|
|
|
|
/* If NODE is not versionable, warn about not emiting HSAIL and return false.
|
|
Otherwise return true. */
|
|
|
|
static bool
|
|
check_warn_node_versionable (cgraph_node *node)
|
|
{
|
|
if (!node->local.versionable)
|
|
{
|
|
warning_at (EXPR_LOCATION (node->decl), OPT_Whsa,
|
|
"could not emit HSAIL for function %s: function cannot be "
|
|
"cloned", node->name ());
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* The function creates HSA clones for all functions that were either
|
|
marked as HSA kernels or are callable HSA functions. Apart from that,
|
|
we redirect all edges that come from an HSA clone and end in another
|
|
HSA clone to connect these two functions. */
|
|
|
|
static unsigned int
|
|
process_hsa_functions (void)
|
|
{
|
|
struct cgraph_node *node;
|
|
|
|
if (hsa_summaries == NULL)
|
|
hsa_summaries = new hsa_summary_t (symtab);
|
|
|
|
FOR_EACH_DEFINED_FUNCTION (node)
|
|
{
|
|
hsa_function_summary *s = hsa_summaries->get (node);
|
|
|
|
/* A linked function is skipped. */
|
|
if (s->m_binded_function != NULL)
|
|
continue;
|
|
|
|
if (s->m_kind != HSA_NONE)
|
|
{
|
|
if (!check_warn_node_versionable (node))
|
|
continue;
|
|
cgraph_node *clone
|
|
= node->create_virtual_clone (vec <cgraph_edge *> (),
|
|
NULL, NULL, "hsa");
|
|
TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
|
|
|
|
clone->force_output = true;
|
|
hsa_summaries->link_functions (clone, node, s->m_kind, false);
|
|
|
|
if (dump_file)
|
|
fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n",
|
|
clone->name (),
|
|
s->m_kind == HSA_KERNEL ? "kernel" : "function");
|
|
}
|
|
else if (hsa_callable_function_p (node->decl))
|
|
{
|
|
if (!check_warn_node_versionable (node))
|
|
continue;
|
|
cgraph_node *clone
|
|
= node->create_virtual_clone (vec <cgraph_edge *> (),
|
|
NULL, NULL, "hsa");
|
|
TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
|
|
|
|
if (!cgraph_local_p (node))
|
|
clone->force_output = true;
|
|
hsa_summaries->link_functions (clone, node, HSA_FUNCTION, false);
|
|
|
|
if (dump_file)
|
|
fprintf (dump_file, "Created a new HSA function clone: %s\n",
|
|
clone->name ());
|
|
}
|
|
}
|
|
|
|
/* Redirect all edges that are between HSA clones. */
|
|
FOR_EACH_DEFINED_FUNCTION (node)
|
|
{
|
|
cgraph_edge *e = node->callees;
|
|
|
|
while (e)
|
|
{
|
|
hsa_function_summary *src = hsa_summaries->get (node);
|
|
if (src->m_kind != HSA_NONE && src->m_gpu_implementation_p)
|
|
{
|
|
hsa_function_summary *dst = hsa_summaries->get (e->callee);
|
|
if (dst->m_kind != HSA_NONE && !dst->m_gpu_implementation_p)
|
|
{
|
|
e->redirect_callee (dst->m_binded_function);
|
|
if (dump_file)
|
|
fprintf (dump_file,
|
|
"Redirecting edge to HSA function: %s->%s\n",
|
|
xstrdup_for_dump (e->caller->name ()),
|
|
xstrdup_for_dump (e->callee->name ()));
|
|
}
|
|
}
|
|
|
|
e = e->next_callee;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Iterate all HSA functions and stream out HSA function summary. */
|
|
|
|
static void
|
|
ipa_hsa_write_summary (void)
|
|
{
|
|
struct bitpack_d bp;
|
|
struct cgraph_node *node;
|
|
struct output_block *ob;
|
|
unsigned int count = 0;
|
|
lto_symtab_encoder_iterator lsei;
|
|
lto_symtab_encoder_t encoder;
|
|
|
|
if (!hsa_summaries)
|
|
return;
|
|
|
|
ob = create_output_block (LTO_section_ipa_hsa);
|
|
encoder = ob->decl_state->symtab_node_encoder;
|
|
ob->symbol = NULL;
|
|
for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
|
|
lsei_next_function_in_partition (&lsei))
|
|
{
|
|
node = lsei_cgraph_node (lsei);
|
|
hsa_function_summary *s = hsa_summaries->get (node);
|
|
|
|
if (s->m_kind != HSA_NONE)
|
|
count++;
|
|
}
|
|
|
|
streamer_write_uhwi (ob, count);
|
|
|
|
/* Process all of the functions. */
|
|
for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
|
|
lsei_next_function_in_partition (&lsei))
|
|
{
|
|
node = lsei_cgraph_node (lsei);
|
|
hsa_function_summary *s = hsa_summaries->get (node);
|
|
|
|
if (s->m_kind != HSA_NONE)
|
|
{
|
|
encoder = ob->decl_state->symtab_node_encoder;
|
|
int node_ref = lto_symtab_encoder_encode (encoder, node);
|
|
streamer_write_uhwi (ob, node_ref);
|
|
|
|
bp = bitpack_create (ob->main_stream);
|
|
bp_pack_value (&bp, s->m_kind, 2);
|
|
bp_pack_value (&bp, s->m_gpu_implementation_p, 1);
|
|
bp_pack_value (&bp, s->m_binded_function != NULL, 1);
|
|
streamer_write_bitpack (&bp);
|
|
if (s->m_binded_function)
|
|
stream_write_tree (ob, s->m_binded_function->decl, true);
|
|
}
|
|
}
|
|
|
|
streamer_write_char_stream (ob->main_stream, 0);
|
|
produce_asm (ob, NULL);
|
|
destroy_output_block (ob);
|
|
}
|
|
|
|
/* Read section in file FILE_DATA of length LEN with data DATA. */
|
|
|
|
static void
|
|
ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data,
|
|
size_t len)
|
|
{
|
|
const struct lto_function_header *header
|
|
= (const struct lto_function_header *) data;
|
|
const int cfg_offset = sizeof (struct lto_function_header);
|
|
const int main_offset = cfg_offset + header->cfg_size;
|
|
const int string_offset = main_offset + header->main_size;
|
|
struct data_in *data_in;
|
|
unsigned int i;
|
|
unsigned int count;
|
|
|
|
lto_input_block ib_main ((const char *) data + main_offset,
|
|
header->main_size, file_data->mode_table);
|
|
|
|
data_in
|
|
= lto_data_in_create (file_data, (const char *) data + string_offset,
|
|
header->string_size, vNULL);
|
|
count = streamer_read_uhwi (&ib_main);
|
|
|
|
for (i = 0; i < count; i++)
|
|
{
|
|
unsigned int index;
|
|
struct cgraph_node *node;
|
|
lto_symtab_encoder_t encoder;
|
|
|
|
index = streamer_read_uhwi (&ib_main);
|
|
encoder = file_data->symtab_node_encoder;
|
|
node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
|
|
index));
|
|
gcc_assert (node->definition);
|
|
hsa_function_summary *s = hsa_summaries->get (node);
|
|
|
|
struct bitpack_d bp = streamer_read_bitpack (&ib_main);
|
|
s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2);
|
|
s->m_gpu_implementation_p = bp_unpack_value (&bp, 1);
|
|
bool has_tree = bp_unpack_value (&bp, 1);
|
|
|
|
if (has_tree)
|
|
{
|
|
tree decl = stream_read_tree (&ib_main, data_in);
|
|
s->m_binded_function = cgraph_node::get_create (decl);
|
|
}
|
|
}
|
|
lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data,
|
|
len);
|
|
lto_data_in_delete (data_in);
|
|
}
|
|
|
|
/* Load streamed HSA functions summary and assign the summary to a function. */
|
|
|
|
static void
|
|
ipa_hsa_read_summary (void)
|
|
{
|
|
struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
|
|
struct lto_file_decl_data *file_data;
|
|
unsigned int j = 0;
|
|
|
|
if (hsa_summaries == NULL)
|
|
hsa_summaries = new hsa_summary_t (symtab);
|
|
|
|
while ((file_data = file_data_vec[j++]))
|
|
{
|
|
size_t len;
|
|
const char *data = lto_get_section_data (file_data, LTO_section_ipa_hsa,
|
|
NULL, &len);
|
|
|
|
if (data)
|
|
ipa_hsa_read_section (file_data, data, len);
|
|
}
|
|
}
|
|
|
|
const pass_data pass_data_ipa_hsa =
|
|
{
|
|
IPA_PASS, /* type */
|
|
"hsa", /* name */
|
|
OPTGROUP_NONE, /* optinfo_flags */
|
|
TV_IPA_HSA, /* tv_id */
|
|
0, /* properties_required */
|
|
0, /* properties_provided */
|
|
0, /* properties_destroyed */
|
|
0, /* todo_flags_start */
|
|
TODO_dump_symtab, /* todo_flags_finish */
|
|
};
|
|
|
|
class pass_ipa_hsa : public ipa_opt_pass_d
|
|
{
|
|
public:
|
|
pass_ipa_hsa (gcc::context *ctxt)
|
|
: ipa_opt_pass_d (pass_data_ipa_hsa, ctxt,
|
|
NULL, /* generate_summary */
|
|
ipa_hsa_write_summary, /* write_summary */
|
|
ipa_hsa_read_summary, /* read_summary */
|
|
ipa_hsa_write_summary, /* write_optimization_summary */
|
|
ipa_hsa_read_summary, /* read_optimization_summary */
|
|
NULL, /* stmt_fixup */
|
|
0, /* function_transform_todo_flags_start */
|
|
NULL, /* function_transform */
|
|
NULL) /* variable_transform */
|
|
{}
|
|
|
|
/* opt_pass methods: */
|
|
virtual bool gate (function *);
|
|
|
|
virtual unsigned int execute (function *) { return process_hsa_functions (); }
|
|
|
|
}; // class pass_ipa_reference
|
|
|
|
bool
|
|
pass_ipa_hsa::gate (function *)
|
|
{
|
|
return hsa_gen_requested_p ();
|
|
}
|
|
|
|
} // anon namespace
|
|
|
|
ipa_opt_pass_d *
|
|
make_pass_ipa_hsa (gcc::context *ctxt)
|
|
{
|
|
return new pass_ipa_hsa (ctxt);
|
|
}
|