diff --git a/gcc/brig/ChangeLog b/gcc/brig/ChangeLog index 69c57cbf80a..0225929c2ef 100644 --- a/gcc/brig/ChangeLog +++ b/gcc/brig/ChangeLog @@ -1,4 +1,29 @@ -2017-05-13 Pekka Jääskeläinen +2017-09-27 Pekka Jääskeläinen + + * brig-lang.c: Improved support for function and module scope + group segment variables. PRM specs defines function and module + scope group segment variables as an experimental feature. However, + PRM test suite uses and hcc relies on them. In addition, hcc + assumes certain group variable layout in its dynamic group segment + allocation code. We cannot have global group memory offsets if we + want to both have kernel-specific group segment size and multiple + kernels calling the same functions that use function scope group memory + variables. Now group segment is handled by separate book keeping of + module scope and function (kernel) offsets. Each function has a "frame" + in the group segment offset to which is given as an argument. + * brigfrontend/brig-branch-inst-handler.cc: See above. + * brigfrontend/brig-code-entry-handler.cc: See above. + * brigfrontend/brig-fbarrier-handler.cc: See above. + * brigfrontend/brig-function-handler.cc: See above. + * brigfrontend/brig-function.cc: See above. + * brigfrontend/brig-function.h: See above. + * brigfrontend/brig-to-generic.cc: See above. + * brigfrontend/brig-to-generic.h: See above. + * brigfrontend/brig-util.cc: See above. + * brigfrontend/brig-util.h: See above. + * brigfrontend/brig-variable-handler.cc: See above. + +2017-09-25 Pekka Jääskeläinen * brigfrontend/brig-to-generic.cc: Ensure per WI copies of private variables are aligned too. diff --git a/gcc/brig/brig-lang.c b/gcc/brig/brig-lang.c index 13e738e1937..a587c8b6091 100644 --- a/gcc/brig/brig-lang.c +++ b/gcc/brig/brig-lang.c @@ -160,7 +160,7 @@ brig_langhook_post_options (const char **pfilename ATTRIBUTE_UNUSED) flag_excess_precision_cmdline = EXCESS_PRECISION_STANDARD; /* gccbrig casts pointers around like crazy, TBAA produces - broken code if not force disabling it. */ + broken code if not force disabling it. */ flag_strict_aliasing = 0; /* Returning false means that the backend should be used. */ @@ -182,6 +182,8 @@ brig_langhook_parse_file (void) { brig_to_generic brig_to_gen; + std::vector brig_blobs; + for (unsigned int i = 0; i < num_in_fnames; ++i) { @@ -194,11 +196,22 @@ brig_langhook_parse_file (void) error ("could not read the BRIG file"); exit (1); } - brig_to_gen.parse (brig_blob); fclose (f); + + brig_to_gen.analyze (brig_blob); + brig_blobs.push_back (brig_blob); + } + + for (size_t i = 0; i < brig_blobs.size(); ++i) + { + char *brig_blob = brig_blobs.at(i); + brig_to_gen.parse (brig_blob); } brig_to_gen.write_globals (); + + for (size_t i = 0; i < brig_blobs.size (); ++i) + delete brig_blobs[i]; } static tree diff --git a/gcc/brig/brigfrontend/brig-branch-inst-handler.cc b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc index 9cec5b6d455..c8912dbccd7 100644 --- a/gcc/brig/brigfrontend/brig-branch-inst-handler.cc +++ b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc @@ -117,8 +117,17 @@ brig_branch_inst_handler::operator () (const BrigBase *base) they might call builtins that need them or access group/private memory. */ + tree group_local_offset + = add_temp_var ("group_local_offset", + build_int_cst + (uint32_type_node, + m_parent.m_cf->m_local_group_variables.size())); + + /* TODO: ensure the callee's frame is aligned! */ + vec_safe_push (in_args, m_parent.m_cf->m_context_arg); vec_safe_push (in_args, m_parent.m_cf->m_group_base_arg); + vec_safe_push (in_args, group_local_offset); vec_safe_push (in_args, m_parent.m_cf->m_private_base_arg); tree call = build_call_vec (ret_val_type, build_fold_addr_expr (func_ref), diff --git a/gcc/brig/brigfrontend/brig-code-entry-handler.cc b/gcc/brig/brigfrontend/brig-code-entry-handler.cc index 8f07d372796..a660739807e 100644 --- a/gcc/brig/brigfrontend/brig-code-entry-handler.cc +++ b/gcc/brig/brigfrontend/brig-code-entry-handler.cc @@ -88,10 +88,17 @@ brig_code_entry_handler::build_code_ref (const BrigBase &ref) { const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref; - uint64_t offset = m_parent.group_variable_segment_offset - (m_parent.get_mangled_name (fbar)); + std::string var_name = m_parent.get_mangled_name (fbar); + uint64_t offset + = m_parent.m_cf->group_variable_segment_offset (var_name); - return build_int_cst (uint32_type_node, offset); + tree local_offset = build_int_cst (uint32_type_node, offset); + if (m_parent.m_cf->m_local_group_variables.has_variable (var_name)) + local_offset + = build2 (PLUS_EXPR, uint64_type_node, local_offset, + convert (uint64_type_node, + m_parent.m_cf->m_group_local_offset_arg)); + return local_offset; } else gcc_unreachable (); @@ -264,9 +271,18 @@ brig_code_entry_handler::build_address_operand } else if (segment == BRIG_SEGMENT_GROUP) { - - uint64_t offset = m_parent.group_variable_segment_offset (var_name); + uint64_t offset + = m_parent.m_cf->group_variable_segment_offset (var_name); const_offset = build_int_cst (size_type_node, offset); + + /* If it's a local group variable reference, substract the local + group segment offset to get the group base ptr offset. */ + if (m_parent.m_cf->m_local_group_variables.has_variable (var_name)) + const_offset + = build2 (PLUS_EXPR, uint64_type_node, const_offset, + convert (uint64_type_node, + m_parent.m_cf->m_group_local_offset_arg)); + } else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL) { diff --git a/gcc/brig/brigfrontend/brig-fbarrier-handler.cc b/gcc/brig/brigfrontend/brig-fbarrier-handler.cc index 802d51ce383..a033db6fc6d 100644 --- a/gcc/brig/brigfrontend/brig-fbarrier-handler.cc +++ b/gcc/brig/brigfrontend/brig-fbarrier-handler.cc @@ -39,6 +39,7 @@ brig_directive_fbarrier_handler::operator () (const BrigBase *base) if (m_parent.m_cf != NULL) m_parent.m_cf->m_function_scope_vars.insert (base); std::string var_name = m_parent.get_mangled_name (fbar); - m_parent.append_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1); + m_parent.add_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1, + m_parent.m_cf != NULL); return base->byteCount; } diff --git a/gcc/brig/brigfrontend/brig-function-handler.cc b/gcc/brig/brigfrontend/brig-function-handler.cc index ebfca3907c1..7896c4ac935 100644 --- a/gcc/brig/brigfrontend/brig-function-handler.cc +++ b/gcc/brig/brigfrontend/brig-function-handler.cc @@ -39,7 +39,8 @@ extern int gccbrig_verbose; size_t brig_directive_function_handler::operator () (const BrigBase *base) { - m_parent.finish_function (); + if (!m_parent.m_analyzing) + m_parent.finish_function (); size_t bytes_consumed = base->byteCount; @@ -64,9 +65,20 @@ brig_directive_function_handler::operator () (const BrigBase *base) if (is_kernel && !is_definition) return bytes_consumed; - m_parent.m_cf = new brig_function (exec, &m_parent); - std::string func_name = m_parent.get_mangled_name (exec); + if (is_kernel) + /* The generated kernel function is not the one that should be + called by the host. */ + func_name = std::string ("_") + func_name; + + m_parent.m_cf = new brig_function (exec, &m_parent); + m_parent.m_cf->m_name = func_name; + m_parent.m_cf->m_is_kernel = is_kernel; + + /* During the analyze step, the above information is all we need per + function. */ + if (m_parent.m_analyzing) + return bytes_consumed; tree fndecl; tree ret_value = NULL_TREE; @@ -79,10 +91,6 @@ brig_directive_function_handler::operator () (const BrigBase *base) if (is_kernel) { - /* The generated kernel function is not the one that should be - called by the host. */ - func_name = std::string ("_") + func_name; - tree name_identifier = get_identifier_with_length (func_name.c_str (), func_name.size ()); @@ -256,6 +264,23 @@ brig_directive_function_handler::operator () (const BrigBase *base) DECL_ARTIFICIAL (group_base_arg) = 1; TREE_READONLY (group_base_arg) = 1; TREE_USED (group_base_arg) = 1; + m_parent.m_cf->m_group_base_arg = group_base_arg; + + /* To implement call stack and (non-kernel) function scope group variables, + we need to pass an offset which describes how far are we from + group_base_ptr. + That must be substracted from any function local group variable offsets to + get the address related to the bottom of the group memory chunk. */ + tree group_local_offset_arg + = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__group_local_offset"), uint32_type_node); + chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg); + DECL_ARG_TYPE (group_local_offset_arg) = uint32_type_node; + DECL_CONTEXT (group_local_offset_arg) = fndecl; + DECL_ARTIFICIAL (group_local_offset_arg) = 1; + TREE_READONLY (group_local_offset_arg) = 1; + TREE_USED (group_local_offset_arg) = 1; + m_parent.m_cf->m_group_local_offset_arg = group_local_offset_arg; /* Same for private. */ tree private_base_arg @@ -329,12 +354,9 @@ brig_directive_function_handler::operator () (const BrigBase *base) m_parent.start_function (fndecl); - m_parent.m_cf->m_name = func_name; m_parent.m_cf->m_func_decl = fndecl; m_parent.m_cf->m_current_bind_expr = bind_expr; - m_parent.m_cf->m_is_kernel = is_kernel; m_parent.m_cf->m_context_arg = context_arg; - m_parent.m_cf->m_group_base_arg = group_base_arg; m_parent.m_cf->m_private_base_arg = private_base_arg; if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node) diff --git a/gcc/brig/brigfrontend/brig-function.cc b/gcc/brig/brigfrontend/brig-function.cc index 0ca9ebe8b0e..f3c3895078a 100644 --- a/gcc/brig/brigfrontend/brig-function.cc +++ b/gcc/brig/brigfrontend/brig-function.cc @@ -52,11 +52,10 @@ brig_function::brig_function (const BrigDirectiveExecutable *exec, m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE), m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE), m_next_kernarg_offset (0), m_kernarg_max_align (0), - m_ret_value_brig_var (NULL), m_has_barriers (false), - m_has_allocas (false), m_has_function_calls_with_barriers (false), - m_calls_analyzed (false), m_is_wg_function (false), - m_has_unexpanded_dp_builtins (false), m_generating_arg_block (false), - m_parent (parent) + m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false), + m_has_function_calls_with_barriers (false), m_calls_analyzed (false), + m_is_wg_function (false), m_has_unexpanded_dp_builtins (false), + m_generating_arg_block (false), m_parent (parent) { memset (m_regs, 0, BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *)); @@ -577,20 +576,31 @@ brig_function::emit_launcher_and_metadata () tree phsail_launch_kernel_call; + /* Compute the local group segment frame start pointer. */ + tree group_local_offset_temp + = create_tmp_var (uint32_type_node, "group_local_offset"); + tree group_local_offset_arg + = build2 (MODIFY_EXPR, uint32_type_node, + group_local_offset_temp, + build_int_cst (uint32_type_node, + m_parent->m_module_group_variables.size())); + /* Emit a launcher depending whether we converted the kernel function to a work group function or not. */ if (m_is_wg_function) phsail_launch_kernel_call = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC), - 3, void_type_node, + 4, void_type_node, ptr_type_node, kernel_func_ptr, ptr_type_node, - context_arg, ptr_type_node, group_base_addr_arg); + context_arg, ptr_type_node, group_base_addr_arg, + uint32_type_node, group_local_offset_arg); else phsail_launch_kernel_call = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL), - 3, void_type_node, + 4, void_type_node, ptr_type_node, kernel_func_ptr, ptr_type_node, - context_arg, ptr_type_node, group_base_addr_arg); + context_arg, ptr_type_node, group_base_addr_arg, + uint32_type_node, group_local_offset_arg); append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list); @@ -722,3 +732,13 @@ brig_function::has_function_scope_var (const BrigBase* var) const { return m_function_scope_vars.find (var) != m_function_scope_vars.end (); } + +size_t +brig_function::group_variable_segment_offset (const std::string &name) const +{ + if (m_local_group_variables.has_variable (name)) + return m_local_group_variables.segment_offset (name); + + gcc_assert (m_parent->m_module_group_variables.has_variable (name)); + return m_parent->m_module_group_variables.segment_offset (name); +} diff --git a/gcc/brig/brigfrontend/brig-function.h b/gcc/brig/brigfrontend/brig-function.h index 71b5d3f996f..2a85f5e69fe 100644 --- a/gcc/brig/brigfrontend/brig-function.h +++ b/gcc/brig/brigfrontend/brig-function.h @@ -30,8 +30,7 @@ #include "tree.h" #include "tree-iterator.h" #include "hsa-brig-format.h" - -class brig_to_generic; +#include "brig-util.h" #include #include @@ -40,6 +39,8 @@ class brig_to_generic; #include "phsa.h" +class brig_to_generic; + typedef std::map label_index; typedef std::map variable_index; typedef std::vector tree_stl_vec; @@ -84,6 +85,12 @@ public: tree add_local_variable (std::string name, tree type); + size_t group_variable_segment_offset (const std::string &name) const; + + bool has_group_variable (const std::string &name) const; + + size_t group_segment_size () const; + tree get_m_var_declfor_reg (const BrigOperandRegister *reg); bool convert_to_wg_function (); @@ -119,10 +126,16 @@ public: /* The __context function argument. */ tree m_context_arg; + /* The __group_base_ptr argument in the current function. - Points to the start of the group segment for the kernel - instance. */ + Points to the start of the group segment for the work-group. */ tree m_group_base_arg; + + /* The __group_local_offset_ptr argument in the current function. It + contains the offset related to the group_base_ptr where the function's + local area for group variables resides. */ + tree m_group_local_offset_arg; + /* The __private_base_ptr argument in the current function. Points to the start of the private segment. */ tree m_private_base_arg; @@ -159,7 +172,7 @@ public: /* True if the function has at least one alloca instruction. */ bool m_has_allocas; - /* If the kernel containts at least one function call that _may_ + /* If the kernel contains at least one function call that _may_ contain a barrier call, this is set to true. */ bool m_has_function_calls_with_barriers; @@ -199,6 +212,10 @@ public: /* The functions called by this function. */ std::vector m_called_functions; + /* Stores the kernel scope group variable offsets if the function is + a kernel. */ + group_variable_offset_index m_local_group_variables; + brig_to_generic *m_parent; /* The metadata of the function that should be stored with the binary and passed to the HSA runtime: */ diff --git a/gcc/brig/brigfrontend/brig-to-generic.cc b/gcc/brig/brigfrontend/brig-to-generic.cc index 2b1d94e0ff7..6459f9e1076 100644 --- a/gcc/brig/brigfrontend/brig-to-generic.cc +++ b/gcc/brig/brigfrontend/brig-to-generic.cc @@ -60,8 +60,8 @@ tree brig_to_generic::s_fp32_type; tree brig_to_generic::s_fp64_type; brig_to_generic::brig_to_generic () - : m_cf (NULL), m_brig (NULL), m_next_group_offset (0), - m_next_private_offset (0) + : m_cf (NULL), m_analyzing (true), m_total_group_segment_usage (0), + m_brig (NULL), m_next_private_offset (0) { m_globals = NULL_TREE; @@ -124,33 +124,32 @@ public: } }; -/* Parses the given BRIG blob. */ +/* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that + should handle its data. */ + +struct code_entry_handler_info +{ + BrigKind kind; + brig_code_entry_handler *handler; +}; + + +/* Finds the BRIG file sections in the currently processed file. */ void -brig_to_generic::parse (const char *brig_blob) +brig_to_generic::find_brig_sections () { - m_brig = brig_blob; - m_brig_blobs.push_back (brig_blob); - - const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob; - - if (strncmp (mheader->identification, "HSA BRIG", 8) != 0) - fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE - "Unrecognized file format."); - if (mheader->brigMajor != 1 || mheader->brigMinor != 0) - fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE - "BRIG version not supported. BRIG 1.0 required."); - m_data = m_code = m_operand = NULL; + const BrigModuleHeader *mheader = (const BrigModuleHeader *) m_brig; /* Find the positions of the different sections. */ for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec) { uint64_t offset - = ((const uint64_t *) (brig_blob + mheader->sectionIndex))[sec]; + = ((const uint64_t *) (m_brig + mheader->sectionIndex))[sec]; const BrigSectionHeader *section_header - = (const BrigSectionHeader *) (brig_blob + offset); + = (const BrigSectionHeader *) (m_brig + offset); std::string name ((const char *) (§ion_header->name), section_header->nameLength); @@ -183,6 +182,94 @@ brig_to_generic::parse (const char *brig_blob) if (m_operand == NULL) gcc_unreachable (); +} + +/* Does a first pass over the given BRIG to collect data needed for the + actual parsing. Currently this includes only collecting the + group segment variable usage to support the experimental HSA PRM feature + where group variables can be declared also in module and function scope + (in addition to kernel scope). +*/ + +void +brig_to_generic::analyze (const char *brig_blob) +{ + const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob; + + if (strncmp (mheader->identification, "HSA BRIG", 8) != 0) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE + "Unrecognized file format."); + if (mheader->brigMajor != 1 || mheader->brigMinor != 0) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE + "BRIG version not supported. BRIG 1.0 required."); + + m_brig = brig_blob; + + find_brig_sections (); + + brig_directive_variable_handler var_handler (*this); + brig_directive_fbarrier_handler fbar_handler (*this); + brig_directive_function_handler func_handler (*this); + + /* Need this for grabbing the module names for mangling the + group variable names. */ + brig_directive_module_handler module_handler (*this); + skipped_entry_handler skipped_handler (*this); + + const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code; + + code_entry_handler_info handlers[] + = {{BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler}, + {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler}, + {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler}, + {BRIG_KIND_DIRECTIVE_MODULE, &module_handler}, + {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}}; + + m_analyzing = true; + for (size_t b = csection_header->headerByteCount; b < m_code_size;) + { + const BrigBase *entry = (const BrigBase *) (m_code + b); + + brig_code_entry_handler *handler = &skipped_handler; + + if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry) + { + /* The function definition ended. We can just discard the place + holder function. */ + m_total_group_segment_usage += m_cf->m_local_group_variables.size (); + delete m_cf; + m_cf = NULL; + } + + /* Find a handler. */ + for (size_t i = 0; + i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i) + { + if (handlers[i].kind == entry->kind) + handler = handlers[i].handler; + } + b += (*handler) (entry); + } + + if (m_cf != NULL) + { + m_total_group_segment_usage += m_cf->m_local_group_variables.size (); + delete m_cf; + m_cf = NULL; + } + + m_total_group_segment_usage += m_module_group_variables.size (); + m_analyzing = false; +} + +/* Parses the given BRIG blob. */ + +void +brig_to_generic::parse (const char *brig_blob) +{ + m_brig = brig_blob; + find_brig_sections (); + brig_basic_inst_handler inst_handler (*this); brig_branch_inst_handler branch_inst_handler (*this); brig_cvt_inst_handler cvt_inst_handler (*this); @@ -269,7 +356,6 @@ brig_to_generic::parse (const char *brig_blob) handler = handlers[i].handler; } b += (*handler) (entry); - continue; } finish_function (); @@ -519,6 +605,29 @@ brig_to_generic::get_finished_function (tree func_decl) return NULL; } +/* Adds a group variable to a correct book keeping structure depending + on its segment. */ + +void +brig_to_generic::add_group_variable (const std::string &name, size_t size, + size_t alignment, bool function_scope) +{ + /* Module and function scope group region variables are an experimental + feature. We implement module scope group variables with a separate + book keeping inside brig_to_generic which is populated in the 'analyze()' + prepass. This is to ensure we know the group segment offsets when + processing the functions that might refer to them. */ + if (!function_scope) + { + if (!m_module_group_variables.has_variable (name)) + m_module_group_variables.add (name, size, alignment); + return; + } + + if (!m_cf->m_local_group_variables.has_variable (name)) + m_cf->m_local_group_variables.add (name, size, alignment); +} + /* Finalizes the currently handled function. Should be called before setting a new function. */ @@ -567,38 +676,6 @@ brig_to_generic::start_function (tree f) m_cf->m_func_decl = f; } -/* Appends a new group variable (or an fbarrier) to the current kernel's - group segment. */ - -void -brig_to_generic::append_group_variable (const std::string &name, size_t size, - size_t alignment) -{ - size_t align_padding = m_next_group_offset % alignment == 0 ? - 0 : (alignment - m_next_group_offset % alignment); - m_next_group_offset += align_padding; - m_group_offsets[name] = m_next_group_offset; - m_next_group_offset += size; -} - -size_t -brig_to_generic::group_variable_segment_offset (const std::string &name) const -{ - var_offset_table::const_iterator i = m_group_offsets.find (name); - gcc_assert (i != m_group_offsets.end ()); - return (*i).second; -} - -/* The size of the group and private segments required by the currently - processed kernel. Private segment size must be multiplied by the - number of work-items in the launch, in case of a work-group function. */ - -size_t -brig_to_generic::group_segment_size () const -{ - return m_next_group_offset; -} - /* Appends a new variable to the current kernel's private segment. */ void @@ -646,13 +723,6 @@ brig_to_generic::has_private_variable (const std::string &name) const return i != m_private_data_sizes.end (); } -bool -brig_to_generic::has_group_variable (const std::string &name) const -{ - var_offset_table::const_iterator i = m_group_offsets.find (name); - return i != m_group_offsets.end (); -} - size_t brig_to_generic::private_variable_size (const std::string &name) const { @@ -662,6 +732,10 @@ brig_to_generic::private_variable_size (const std::string &name) const return (*i).second; } + +/* The size of private segment required by a single work-item executing + the currently processed kernel. */ + size_t brig_to_generic::private_segment_size () const { @@ -735,10 +809,11 @@ brig_to_generic::write_globals () cgraph_node::finalize_function (f->m_func_decl, true); f->m_descriptor.is_kernel = 1; - /* TODO: analyze the kernel's actual group and private segment usage - using a call graph. Now the private and group mem sizes are overly - pessimistic in case of multiple kernels in the same module. */ - f->m_descriptor.group_segment_size = group_segment_size (); + /* TODO: analyze the kernel's actual private and group segment usage + using call graph. Now the mem size is overly + pessimistic in case of multiple kernels in the same module. + */ + f->m_descriptor.group_segment_size = m_total_group_segment_usage; f->m_descriptor.private_segment_size = private_segment_size (); /* The kernarg size is rounded up to a multiple of 16 according to @@ -774,8 +849,6 @@ brig_to_generic::write_globals () delete[] vec; - for (size_t i = 0; i < m_brig_blobs.size (); ++i) - delete m_brig_blobs[i]; } /* Returns an type with unsigned int elements corresponding to the diff --git a/gcc/brig/brigfrontend/brig-to-generic.h b/gcc/brig/brigfrontend/brig-to-generic.h index b94ff7cf57b..0070894dd26 100644 --- a/gcc/brig/brigfrontend/brig-to-generic.h +++ b/gcc/brig/brigfrontend/brig-to-generic.h @@ -36,7 +36,6 @@ #include "hsa-brig-format.h" #include "brig-function.h" - struct reg_decl_index_entry; /* Converts an HSAIL BRIG input to GENERIC. This class holds global state @@ -56,6 +55,7 @@ private: public: brig_to_generic (); + void analyze (const char *brig_blob); void parse (const char *brig_blob); void write_globals (); @@ -78,17 +78,9 @@ public: void start_function (tree f); void finish_function (); - void append_group_variable (const std::string &name, size_t size, - size_t alignment); - void append_private_variable (const std::string &name, size_t size, size_t alignment); - size_t group_variable_segment_offset (const std::string &name) const; - - bool - has_group_variable (const std::string &name) const; - size_t private_variable_segment_offset (const std::string &name) const; @@ -107,11 +99,13 @@ public: { return get_mangled_name_tmpl (var); } std::string get_mangled_name (const BrigDirectiveExecutable *func) const; - size_t group_segment_size () const; size_t private_segment_size () const; brig_function *get_finished_function (tree func_decl); + void add_group_variable (const std::string &name, size_t size, + size_t alignment, bool function_scope); + static tree s_fp16_type; static tree s_fp32_type; static tree s_fp64_type; @@ -123,10 +117,21 @@ public: /* The currently built function. */ brig_function *m_cf; + /* Stores the module and function scope group variable offsets. */ + group_variable_offset_index m_module_group_variables; + /* The name of the currently handled BRIG module. */ std::string m_module_name; + /* Set to true if the compilation is in 'analyze' phase. */ + bool m_analyzing; + + /* Accumulates the total group segment usage. */ + size_t m_total_group_segment_usage; + private: + + void find_brig_sections (); /* The BRIG blob and its different sections of the file currently being parsed. */ const char *m_brig; @@ -144,10 +149,6 @@ private: /* The size of each private variable, including the alignment padding. */ std::map m_private_data_sizes; - /* The same for group variables. */ - size_t m_next_group_offset; - var_offset_table m_group_offsets; - /* And private. */ size_t m_next_private_offset; var_offset_table m_private_offsets; @@ -162,9 +163,6 @@ private: for some interprocedural analysis. */ std::map m_finished_functions; - /* The parsed BRIG blobs. Owned and will be deleted after use. */ - std::vector m_brig_blobs; - /* The original dump file. */ FILE *m_dump_file; diff --git a/gcc/brig/brigfrontend/brig-util.cc b/gcc/brig/brigfrontend/brig-util.cc index f96ae6ab5e6..a8684de9131 100644 --- a/gcc/brig/brigfrontend/brig-util.cc +++ b/gcc/brig/brigfrontend/brig-util.cc @@ -27,6 +27,34 @@ along with GCC; see the file COPYING3. If not see #include "errors.h" #include "diagnostic-core.h" +bool +group_variable_offset_index::has_variable (const std::string &name) const +{ + varname_offset_table::const_iterator i = m_group_offsets.find (name); + return i != m_group_offsets.end (); +} + +/* Adds a new group segment variable. */ + +void +group_variable_offset_index::add (const std::string &name, size_t size, + size_t alignment) +{ + size_t align_padding = m_next_group_offset % alignment == 0 ? + 0 : (alignment - m_next_group_offset % alignment); + m_next_group_offset += align_padding; + m_group_offsets[name] = m_next_group_offset; + m_next_group_offset += size; +} + +size_t +group_variable_offset_index::segment_offset (const std::string &name) const +{ + varname_offset_table::const_iterator i = m_group_offsets.find (name); + gcc_assert (i != m_group_offsets.end ()); + return (*i).second; +} + /* Return true if operand number OPNUM of instruction with OPCODE is an output. False if it is an input. Some code reused from Martin Jambor's gcc-hsa tree. */ diff --git a/gcc/brig/brigfrontend/brig-util.h b/gcc/brig/brigfrontend/brig-util.h index 3060f5b87f9..c90ff29d0fd 100644 --- a/gcc/brig/brigfrontend/brig-util.h +++ b/gcc/brig/brigfrontend/brig-util.h @@ -22,7 +22,33 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_BRIG_UTIL_H #define GCC_BRIG_UTIL_H -#include "brig-to-generic.h" +#include + +#include "config.h" +#include "system.h" +#include "ansidecl.h" +#include "coretypes.h" +#include "opts.h" +#include "tree.h" + +/* Helper class for keeping book of group variable offsets. */ + +class group_variable_offset_index +{ +public: + group_variable_offset_index () : m_next_group_offset (0) {} + + typedef std::map varname_offset_table; + + bool has_variable (const std::string &name) const; + void add (const std::string &name, size_t size, size_t alignment); + size_t segment_offset (const std::string &name) const; + size_t size () const { return m_next_group_offset; } + +private: + size_t m_next_group_offset; + varname_offset_table m_group_offsets; +}; bool gccbrig_hsa_opcode_op_output_p (BrigOpcode16_t opcode, int opnum); diff --git a/gcc/brig/brigfrontend/brig-variable-handler.cc b/gcc/brig/brigfrontend/brig-variable-handler.cc index b2e869b7ed5..cd0e98107f5 100644 --- a/gcc/brig/brigfrontend/brig-variable-handler.cc +++ b/gcc/brig/brigfrontend/brig-variable-handler.cc @@ -144,10 +144,25 @@ brig_directive_variable_handler::operator () (const BrigBase *base) size_t alignment = get_brig_var_alignment (brigVar); - if (m_parent.m_cf != NULL) + bool function_scope = m_parent.m_cf != NULL; + + if (function_scope) m_parent.m_cf->m_function_scope_vars.insert (base); std::string var_name = m_parent.get_mangled_name (brigVar); + if (brigVar->segment == BRIG_SEGMENT_GROUP) + { + /* Non-kernel scope group variables have been added at the + 'analyze' stage. */ + m_parent.add_group_variable (var_name, var_size, alignment, + function_scope); + return base->byteCount; + } + + /* During analyze, handle only (module scope) group variables. */ + if (m_parent.m_analyzing) + return base->byteCount; + if (brigVar->segment == BRIG_SEGMENT_KERNARG) { /* Do not create a real variable, but only a table of @@ -158,18 +173,6 @@ brig_directive_variable_handler::operator () (const BrigBase *base) m_parent.m_cf->append_kernel_arg (brigVar, var_size, alignment); return base->byteCount; } - else if (brigVar->segment == BRIG_SEGMENT_GROUP) - { - /* Handle group region variables similarly as kernargs: - assign offsets to the group region on the fly when - a new module scope or function scope group variable is - introduced. These offsets will be then added to the - group_base hidden pointer passed to the kernel in order to - get the flat address. */ - if (!m_parent.has_group_variable (var_name)) - m_parent.append_group_variable (var_name, var_size, alignment); - return base->byteCount; - } else if (brigVar->segment == BRIG_SEGMENT_PRIVATE || brigVar->segment == BRIG_SEGMENT_SPILL) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 39682ce7259..9e67ac34e58 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2017-09-27 Pekka Jääskeläinen + + * brig.dg/test/gimple/fbarrier.hsail: Fixed tests to match the new + new group memory offsetting code in the BRIG frontend. + * brig.dg/test/gimple/function_calls.hsail: Likewise. + * brig.dg/test/gimple/smoke_test.hsail: Likewise. + * brig.dg/test/gimple/variables.hsail: Likewise. + 2017-09-27 Jakub Jelinek PR c++/82159 diff --git a/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail index a58ca0964ad..9efe0271571 100644 --- a/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail @@ -65,10 +65,10 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* { dg-final { scan-tree-dump "__hsail_waitfbar \\\(0, __context\\\);" "gimple"} } */ /* { dg-final { scan-tree-dump "__hsail_initfbar \\\(0, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ /* { dg-final { scan-tree-dump "__hsail_joinfbar \\\(0, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail index d3b690cd37a..50f79060b59 100644 --- a/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail @@ -46,7 +46,7 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* The generated function call should have the incoming arguments and three hidden arguments. */ -/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, __private_base_addr\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, group_local_offset.*, __private_base_addr\\\);" "gimple"} } */ /* The callee should refer directly to the scalar arguments when it reads them. */ /* { dg-final { scan-tree-dump "= float_arg;" "gimple"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail index 850aeeb6a84..1f36ddc4181 100644 --- a/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail @@ -42,7 +42,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* The kernel function itself should have a fingerprint as follows */ /* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */ -/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, void \\\* __private_base_addr\\\)" "gimple"} } */ +/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, unsigned int __group_local_offset, void \\\* __private_base_addr\\\)" "gimple"} } */ /* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */ /* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */ @@ -73,7 +73,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* The launcher should call __hsail_launch_wg_function in this case: */ /* Kernel (void * __context, void * __group_base_addr) */ /* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr\\\);" "gimple"} }*/ +/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr, group_local_offset.*\\\);" "gimple"} }*/ /* The kernel should have the magic metadata section injected to the ELF. */ /* TODO: this should be disabled in case not outputting to an ELF. */ @@ -85,7 +85,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;\[\n \]+__builtin___hsail_barrier \\\(__context\\\);\[\n \]+s3 = s0 \\\+ 4294967295;" "gimple"} } */ /* The kernel with the barrier call's launcher function should call the thread-spawning function. */ -/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr\\\);" "gimple" } } */ +/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr, group_local_offset.*\\\);" "gimple" } } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/variables.hsail b/gcc/testsuite/brig.dg/test/gimple/variables.hsail index c76ea606575..5fd96c1c7bd 100644 --- a/gcc/testsuite/brig.dg/test/gimple/variables.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/variables.hsail @@ -3,7 +3,7 @@ module &module:1:0:$full:$large:$default; /* Tests for different variable scopes and address spaces. */ /* { dg-do compile } */ -/* { dg-options "-fdump-tree-gimple" } */ +/* { dg-options "-fdump-tree-gimple -fdump-tree-original" } */ prog align(256) private_u32 &prog_private; private_u32 &mod_private; @@ -29,7 +29,10 @@ prog function &subfunction(arg_u32 %return_value)(arg_u32 %arg) { ld_private_u32 $s200, [%func_private]; st_private_u32 $s200, [&prog_private]; +/* { dg-final { scan-tree-dump "__group_base_addr \\\+ \\\(0 \\\+" "original" } } */ ld_group_u32 $s203, [%func_group]; + +/* { dg-final { scan-tree-dump "__group_base_addr \\\+ 0" "original" } } */ st_group_u32 $s203, [&prog_group]; ld_global_u32 $s204, [%func_global]; @@ -104,8 +107,6 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) kern_group @12 (3) */ -/* { dg-final { scan-tree-dump "\\\+ 8;.*\\\+ 12;.*\\\+ 4;" "gimple" } } */ - /* The "mangling" of the global and readonly vars. */ /* { dg-final { scan-tree-dump "\[ \]*prog_global = s204;" "gimple" } } */ diff --git a/libhsail-rt/ChangeLog b/libhsail-rt/ChangeLog index bf86278584d..53d3634a640 100644 --- a/libhsail-rt/ChangeLog +++ b/libhsail-rt/ChangeLog @@ -1,7 +1,15 @@ +2017-09-27 Pekka Jääskeläinen + + * include/internal/phsa-rt.h: Support for improved group segment + handling with a stack-like allocation scheme. + * include/internal/workitems.h: Likewise. + * rt/workitems.c: Likewise. + 2017-09-25 Pekka Jääskeläinen * rt/workitems.c: Assume the host runtime allocates the work group memory. + 2017-05-03 Pekka Jääskeläinen * rt/workitems.c: Removed a leftover comment. diff --git a/libhsail-rt/include/internal/phsa-rt.h b/libhsail-rt/include/internal/phsa-rt.h index d47cbfcd3b6..13349e7fdbc 100644 --- a/libhsail-rt/include/internal/phsa-rt.h +++ b/libhsail-rt/include/internal/phsa-rt.h @@ -42,7 +42,8 @@ typedef void (*gccbrigKernelLauncherFunc) (void *context, void *); /* Pointer type for kernel functions produced by gccbrig from the HSAIL. This is private from outside the device binary and only called by the launcher. */ -typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, void *); +typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t, + void *); /* Context data that is passed to the kernel function, initialized by the runtime to the current launch information. The data is diff --git a/libhsail-rt/include/internal/workitems.h b/libhsail-rt/include/internal/workitems.h index e7d386d32ec..2abfc61d867 100644 --- a/libhsail-rt/include/internal/workitems.h +++ b/libhsail-rt/include/internal/workitems.h @@ -63,6 +63,11 @@ typedef struct to the work-group. */ void *group_base_ptr; + /* The offset in the group memory for the kernel local group variables. + To support module scope group variables, there might be need to preseve + room for them in the beginning of the group segment. */ + uint32_t initial_group_offset; + /* Similarly to the private segment that gets space allocated for all WIs in the work-group. */ void *private_base_ptr; diff --git a/libhsail-rt/rt/workitems.c b/libhsail-rt/rt/workitems.c index ed1185a5780..b24fc108357 100644 --- a/libhsail-rt/rt/workitems.c +++ b/libhsail-rt/rt/workitems.c @@ -113,7 +113,7 @@ phsa_work_item_thread (int arg0, int arg1) && wi->z < __hsail_currentworkgroupsize (2, wi)) { l_data->kernel (l_data->kernarg_addr, wi, wg->group_base_ptr, - wg->private_base_ptr); + wg->initial_group_offset, wg->private_base_ptr); #ifdef DEBUG_PHSA_RT printf ("done.\n"); #endif @@ -221,7 +221,8 @@ phsa_work_item_thread (int arg0, int arg1) static void phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, - size_t wg_size_x, size_t wg_size_y, size_t wg_size_z) + uint32_t group_local_offset, size_t wg_size_x, + size_t wg_size_y, size_t wg_size_z) { PHSAWorkItem *wi_threads = NULL; PHSAWorkGroup wg; @@ -247,6 +248,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, wg.alloca_stack_p = wg.private_segment_total_size; wg.alloca_frame_p = wg.alloca_stack_p; + wg.initial_group_offset = group_local_offset; #ifdef EXECUTE_WGS_BACKWARDS wg.x = context->wg_max_x - 1; @@ -313,7 +315,8 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, them execute all the WGs, including a potential partial WG. */ static void -phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr) +phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr, + uint32_t group_local_offset) { hsa_kernel_dispatch_packet_t *dp = context->dp; size_t x, y, z; @@ -361,8 +364,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr) dp->grid_size_y, dp->grid_size_z); #endif - phsa_execute_wi_gang (context, group_base_ptr, sat_wg_size_x, sat_wg_size_y, - sat_wg_size_z); + phsa_execute_wi_gang (context, group_base_ptr, group_local_offset, + sat_wg_size_x, sat_wg_size_y, sat_wg_size_z); } #endif @@ -374,7 +377,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr) execute massive numbers of work-items in a non-SPMD machine than fibers (easily 100x faster). */ static void -phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr) +phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr, + uint32_t group_local_offset) { hsa_kernel_dispatch_packet_t *dp = context->dp; size_t x, y, z, wg_x, wg_y, wg_z; @@ -462,7 +466,7 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr) wi.wg->z = wg_z; context->kernel (context->kernarg_addr, &wi, group_base_ptr, - private_base_ptr); + group_local_offset, private_base_ptr); #if defined (BENCHMARK_PHSA_RT) wg_count++; @@ -527,19 +531,20 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr) void __hsail_launch_kernel (gccbrigKernelFunc kernel, PHSAKernelLaunchData *context, - void *group_base_ptr) + void *group_base_ptr, uint32_t group_local_offset) { context->kernel = kernel; - phsa_spawn_work_items (context, group_base_ptr); + phsa_spawn_work_items (context, group_base_ptr, group_local_offset); } #endif void __hsail_launch_wg_function (gccbrigKernelFunc kernel, - PHSAKernelLaunchData *context, void *group_base_ptr) + PHSAKernelLaunchData *context, void *group_base_ptr, + uint32_t group_local_offset) { context->kernel = kernel; - phsa_execute_work_groups (context, group_base_ptr); + phsa_execute_work_groups (context, group_base_ptr, group_local_offset); } uint32_t