[BRIGFE] Improved support for function and module scope group

segment variables.
    
PRM specs defines function and module scope group segment variables
as an experimental feature. However, PRM test suite uses and
hcc relies on them. In addition, hcc assumes certain group variable
layout in its dynamic group segment allocation code.
    
We cannot have global group memory offsets if we want to
both have kernel-specific group segment size and multiple kernels
calling the same functions that use function scope group memory
variables.
    
Now group segment is handled by separate book keeping of module
scope and function (kernel) offsets. Each function has a "frame"
in the group segment offset to which is given as an argument.

From-SVN: r253233
This commit is contained in:
Pekka Jääskeläinen 2017-09-27 15:40:24 +00:00 committed by Pekka Jääskeläinen
parent 6f0e6f0868
commit d4b7f2ee4b
22 changed files with 430 additions and 151 deletions

View File

@ -1,4 +1,29 @@
2017-05-13 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
2017-09-27 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* brig-lang.c: Improved support for function and module scope
group segment variables. PRM specs defines function and module
scope group segment variables as an experimental feature. However,
PRM test suite uses and hcc relies on them. In addition, hcc
assumes certain group variable layout in its dynamic group segment
allocation code. We cannot have global group memory offsets if we
want to both have kernel-specific group segment size and multiple
kernels calling the same functions that use function scope group memory
variables. Now group segment is handled by separate book keeping of
module scope and function (kernel) offsets. Each function has a "frame"
in the group segment offset to which is given as an argument.
* brigfrontend/brig-branch-inst-handler.cc: See above.
* brigfrontend/brig-code-entry-handler.cc: See above.
* brigfrontend/brig-fbarrier-handler.cc: See above.
* brigfrontend/brig-function-handler.cc: See above.
* brigfrontend/brig-function.cc: See above.
* brigfrontend/brig-function.h: See above.
* brigfrontend/brig-to-generic.cc: See above.
* brigfrontend/brig-to-generic.h: See above.
* brigfrontend/brig-util.cc: See above.
* brigfrontend/brig-util.h: See above.
* brigfrontend/brig-variable-handler.cc: See above.
2017-09-25 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* brigfrontend/brig-to-generic.cc: Ensure per WI copies of
private variables are aligned too.

View File

@ -160,7 +160,7 @@ brig_langhook_post_options (const char **pfilename ATTRIBUTE_UNUSED)
flag_excess_precision_cmdline = EXCESS_PRECISION_STANDARD;
/* gccbrig casts pointers around like crazy, TBAA produces
broken code if not force disabling it. */
broken code if not force disabling it. */
flag_strict_aliasing = 0;
/* Returning false means that the backend should be used. */
@ -182,6 +182,8 @@ brig_langhook_parse_file (void)
{
brig_to_generic brig_to_gen;
std::vector <char*> brig_blobs;
for (unsigned int i = 0; i < num_in_fnames; ++i)
{
@ -194,11 +196,22 @@ brig_langhook_parse_file (void)
error ("could not read the BRIG file");
exit (1);
}
brig_to_gen.parse (brig_blob);
fclose (f);
brig_to_gen.analyze (brig_blob);
brig_blobs.push_back (brig_blob);
}
for (size_t i = 0; i < brig_blobs.size(); ++i)
{
char *brig_blob = brig_blobs.at(i);
brig_to_gen.parse (brig_blob);
}
brig_to_gen.write_globals ();
for (size_t i = 0; i < brig_blobs.size (); ++i)
delete brig_blobs[i];
}
static tree

View File

@ -117,8 +117,17 @@ brig_branch_inst_handler::operator () (const BrigBase *base)
they might call builtins that need them or access group/private
memory. */
tree group_local_offset
= add_temp_var ("group_local_offset",
build_int_cst
(uint32_type_node,
m_parent.m_cf->m_local_group_variables.size()));
/* TODO: ensure the callee's frame is aligned! */
vec_safe_push (in_args, m_parent.m_cf->m_context_arg);
vec_safe_push (in_args, m_parent.m_cf->m_group_base_arg);
vec_safe_push (in_args, group_local_offset);
vec_safe_push (in_args, m_parent.m_cf->m_private_base_arg);
tree call = build_call_vec (ret_val_type, build_fold_addr_expr (func_ref),

View File

@ -88,10 +88,17 @@ brig_code_entry_handler::build_code_ref (const BrigBase &ref)
{
const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref;
uint64_t offset = m_parent.group_variable_segment_offset
(m_parent.get_mangled_name (fbar));
std::string var_name = m_parent.get_mangled_name (fbar);
uint64_t offset
= m_parent.m_cf->group_variable_segment_offset (var_name);
return build_int_cst (uint32_type_node, offset);
tree local_offset = build_int_cst (uint32_type_node, offset);
if (m_parent.m_cf->m_local_group_variables.has_variable (var_name))
local_offset
= build2 (PLUS_EXPR, uint64_type_node, local_offset,
convert (uint64_type_node,
m_parent.m_cf->m_group_local_offset_arg));
return local_offset;
}
else
gcc_unreachable ();
@ -264,9 +271,18 @@ brig_code_entry_handler::build_address_operand
}
else if (segment == BRIG_SEGMENT_GROUP)
{
uint64_t offset = m_parent.group_variable_segment_offset (var_name);
uint64_t offset
= m_parent.m_cf->group_variable_segment_offset (var_name);
const_offset = build_int_cst (size_type_node, offset);
/* If it's a local group variable reference, substract the local
group segment offset to get the group base ptr offset. */
if (m_parent.m_cf->m_local_group_variables.has_variable (var_name))
const_offset
= build2 (PLUS_EXPR, uint64_type_node, const_offset,
convert (uint64_type_node,
m_parent.m_cf->m_group_local_offset_arg));
}
else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL)
{

View File

@ -39,6 +39,7 @@ brig_directive_fbarrier_handler::operator () (const BrigBase *base)
if (m_parent.m_cf != NULL)
m_parent.m_cf->m_function_scope_vars.insert (base);
std::string var_name = m_parent.get_mangled_name (fbar);
m_parent.append_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1);
m_parent.add_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1,
m_parent.m_cf != NULL);
return base->byteCount;
}

View File

@ -39,7 +39,8 @@ extern int gccbrig_verbose;
size_t
brig_directive_function_handler::operator () (const BrigBase *base)
{
m_parent.finish_function ();
if (!m_parent.m_analyzing)
m_parent.finish_function ();
size_t bytes_consumed = base->byteCount;
@ -64,9 +65,20 @@ brig_directive_function_handler::operator () (const BrigBase *base)
if (is_kernel && !is_definition)
return bytes_consumed;
m_parent.m_cf = new brig_function (exec, &m_parent);
std::string func_name = m_parent.get_mangled_name (exec);
if (is_kernel)
/* The generated kernel function is not the one that should be
called by the host. */
func_name = std::string ("_") + func_name;
m_parent.m_cf = new brig_function (exec, &m_parent);
m_parent.m_cf->m_name = func_name;
m_parent.m_cf->m_is_kernel = is_kernel;
/* During the analyze step, the above information is all we need per
function. */
if (m_parent.m_analyzing)
return bytes_consumed;
tree fndecl;
tree ret_value = NULL_TREE;
@ -79,10 +91,6 @@ brig_directive_function_handler::operator () (const BrigBase *base)
if (is_kernel)
{
/* The generated kernel function is not the one that should be
called by the host. */
func_name = std::string ("_") + func_name;
tree name_identifier
= get_identifier_with_length (func_name.c_str (), func_name.size ());
@ -256,6 +264,23 @@ brig_directive_function_handler::operator () (const BrigBase *base)
DECL_ARTIFICIAL (group_base_arg) = 1;
TREE_READONLY (group_base_arg) = 1;
TREE_USED (group_base_arg) = 1;
m_parent.m_cf->m_group_base_arg = group_base_arg;
/* To implement call stack and (non-kernel) function scope group variables,
we need to pass an offset which describes how far are we from
group_base_ptr.
That must be substracted from any function local group variable offsets to
get the address related to the bottom of the group memory chunk. */
tree group_local_offset_arg
= build_decl (UNKNOWN_LOCATION, PARM_DECL,
get_identifier ("__group_local_offset"), uint32_type_node);
chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg);
DECL_ARG_TYPE (group_local_offset_arg) = uint32_type_node;
DECL_CONTEXT (group_local_offset_arg) = fndecl;
DECL_ARTIFICIAL (group_local_offset_arg) = 1;
TREE_READONLY (group_local_offset_arg) = 1;
TREE_USED (group_local_offset_arg) = 1;
m_parent.m_cf->m_group_local_offset_arg = group_local_offset_arg;
/* Same for private. */
tree private_base_arg
@ -329,12 +354,9 @@ brig_directive_function_handler::operator () (const BrigBase *base)
m_parent.start_function (fndecl);
m_parent.m_cf->m_name = func_name;
m_parent.m_cf->m_func_decl = fndecl;
m_parent.m_cf->m_current_bind_expr = bind_expr;
m_parent.m_cf->m_is_kernel = is_kernel;
m_parent.m_cf->m_context_arg = context_arg;
m_parent.m_cf->m_group_base_arg = group_base_arg;
m_parent.m_cf->m_private_base_arg = private_base_arg;
if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node)

View File

@ -52,11 +52,10 @@ brig_function::brig_function (const BrigDirectiveExecutable *exec,
m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
m_next_kernarg_offset (0), m_kernarg_max_align (0),
m_ret_value_brig_var (NULL), m_has_barriers (false),
m_has_allocas (false), m_has_function_calls_with_barriers (false),
m_calls_analyzed (false), m_is_wg_function (false),
m_has_unexpanded_dp_builtins (false), m_generating_arg_block (false),
m_parent (parent)
m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
m_generating_arg_block (false), m_parent (parent)
{
memset (m_regs, 0,
BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
@ -577,20 +576,31 @@ brig_function::emit_launcher_and_metadata ()
tree phsail_launch_kernel_call;
/* Compute the local group segment frame start pointer. */
tree group_local_offset_temp
= create_tmp_var (uint32_type_node, "group_local_offset");
tree group_local_offset_arg
= build2 (MODIFY_EXPR, uint32_type_node,
group_local_offset_temp,
build_int_cst (uint32_type_node,
m_parent->m_module_group_variables.size()));
/* Emit a launcher depending whether we converted the kernel function to
a work group function or not. */
if (m_is_wg_function)
phsail_launch_kernel_call
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
3, void_type_node,
4, void_type_node,
ptr_type_node, kernel_func_ptr, ptr_type_node,
context_arg, ptr_type_node, group_base_addr_arg);
context_arg, ptr_type_node, group_base_addr_arg,
uint32_type_node, group_local_offset_arg);
else
phsail_launch_kernel_call
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
3, void_type_node,
4, void_type_node,
ptr_type_node, kernel_func_ptr, ptr_type_node,
context_arg, ptr_type_node, group_base_addr_arg);
context_arg, ptr_type_node, group_base_addr_arg,
uint32_type_node, group_local_offset_arg);
append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
@ -722,3 +732,13 @@ brig_function::has_function_scope_var (const BrigBase* var) const
{
return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
}
size_t
brig_function::group_variable_segment_offset (const std::string &name) const
{
if (m_local_group_variables.has_variable (name))
return m_local_group_variables.segment_offset (name);
gcc_assert (m_parent->m_module_group_variables.has_variable (name));
return m_parent->m_module_group_variables.segment_offset (name);
}

View File

@ -30,8 +30,7 @@
#include "tree.h"
#include "tree-iterator.h"
#include "hsa-brig-format.h"
class brig_to_generic;
#include "brig-util.h"
#include <map>
#include <string>
@ -40,6 +39,8 @@ class brig_to_generic;
#include "phsa.h"
class brig_to_generic;
typedef std::map<std::string, tree> label_index;
typedef std::map<const BrigDirectiveVariable *, tree> variable_index;
typedef std::vector<tree> tree_stl_vec;
@ -84,6 +85,12 @@ public:
tree add_local_variable (std::string name, tree type);
size_t group_variable_segment_offset (const std::string &name) const;
bool has_group_variable (const std::string &name) const;
size_t group_segment_size () const;
tree get_m_var_declfor_reg (const BrigOperandRegister *reg);
bool convert_to_wg_function ();
@ -119,10 +126,16 @@ public:
/* The __context function argument. */
tree m_context_arg;
/* The __group_base_ptr argument in the current function.
Points to the start of the group segment for the kernel
instance. */
Points to the start of the group segment for the work-group. */
tree m_group_base_arg;
/* The __group_local_offset_ptr argument in the current function. It
contains the offset related to the group_base_ptr where the function's
local area for group variables resides. */
tree m_group_local_offset_arg;
/* The __private_base_ptr argument in the current function.
Points to the start of the private segment. */
tree m_private_base_arg;
@ -159,7 +172,7 @@ public:
/* True if the function has at least one alloca instruction. */
bool m_has_allocas;
/* If the kernel containts at least one function call that _may_
/* If the kernel contains at least one function call that _may_
contain a barrier call, this is set to true. */
bool m_has_function_calls_with_barriers;
@ -199,6 +212,10 @@ public:
/* The functions called by this function. */
std::vector<tree> m_called_functions;
/* Stores the kernel scope group variable offsets if the function is
a kernel. */
group_variable_offset_index m_local_group_variables;
brig_to_generic *m_parent;
/* The metadata of the function that should be stored with the binary and
passed to the HSA runtime: */

View File

@ -60,8 +60,8 @@ tree brig_to_generic::s_fp32_type;
tree brig_to_generic::s_fp64_type;
brig_to_generic::brig_to_generic ()
: m_cf (NULL), m_brig (NULL), m_next_group_offset (0),
m_next_private_offset (0)
: m_cf (NULL), m_analyzing (true), m_total_group_segment_usage (0),
m_brig (NULL), m_next_private_offset (0)
{
m_globals = NULL_TREE;
@ -124,33 +124,32 @@ public:
}
};
/* Parses the given BRIG blob. */
/* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that
should handle its data. */
struct code_entry_handler_info
{
BrigKind kind;
brig_code_entry_handler *handler;
};
/* Finds the BRIG file sections in the currently processed file. */
void
brig_to_generic::parse (const char *brig_blob)
brig_to_generic::find_brig_sections ()
{
m_brig = brig_blob;
m_brig_blobs.push_back (brig_blob);
const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob;
if (strncmp (mheader->identification, "HSA BRIG", 8) != 0)
fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
"Unrecognized file format.");
if (mheader->brigMajor != 1 || mheader->brigMinor != 0)
fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
"BRIG version not supported. BRIG 1.0 required.");
m_data = m_code = m_operand = NULL;
const BrigModuleHeader *mheader = (const BrigModuleHeader *) m_brig;
/* Find the positions of the different sections. */
for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec)
{
uint64_t offset
= ((const uint64_t *) (brig_blob + mheader->sectionIndex))[sec];
= ((const uint64_t *) (m_brig + mheader->sectionIndex))[sec];
const BrigSectionHeader *section_header
= (const BrigSectionHeader *) (brig_blob + offset);
= (const BrigSectionHeader *) (m_brig + offset);
std::string name ((const char *) (&section_header->name),
section_header->nameLength);
@ -183,6 +182,94 @@ brig_to_generic::parse (const char *brig_blob)
if (m_operand == NULL)
gcc_unreachable ();
}
/* Does a first pass over the given BRIG to collect data needed for the
actual parsing. Currently this includes only collecting the
group segment variable usage to support the experimental HSA PRM feature
where group variables can be declared also in module and function scope
(in addition to kernel scope).
*/
void
brig_to_generic::analyze (const char *brig_blob)
{
const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob;
if (strncmp (mheader->identification, "HSA BRIG", 8) != 0)
fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
"Unrecognized file format.");
if (mheader->brigMajor != 1 || mheader->brigMinor != 0)
fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
"BRIG version not supported. BRIG 1.0 required.");
m_brig = brig_blob;
find_brig_sections ();
brig_directive_variable_handler var_handler (*this);
brig_directive_fbarrier_handler fbar_handler (*this);
brig_directive_function_handler func_handler (*this);
/* Need this for grabbing the module names for mangling the
group variable names. */
brig_directive_module_handler module_handler (*this);
skipped_entry_handler skipped_handler (*this);
const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
code_entry_handler_info handlers[]
= {{BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
{BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
{BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
{BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
{BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}};
m_analyzing = true;
for (size_t b = csection_header->headerByteCount; b < m_code_size;)
{
const BrigBase *entry = (const BrigBase *) (m_code + b);
brig_code_entry_handler *handler = &skipped_handler;
if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
{
/* The function definition ended. We can just discard the place
holder function. */
m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
delete m_cf;
m_cf = NULL;
}
/* Find a handler. */
for (size_t i = 0;
i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
{
if (handlers[i].kind == entry->kind)
handler = handlers[i].handler;
}
b += (*handler) (entry);
}
if (m_cf != NULL)
{
m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
delete m_cf;
m_cf = NULL;
}
m_total_group_segment_usage += m_module_group_variables.size ();
m_analyzing = false;
}
/* Parses the given BRIG blob. */
void
brig_to_generic::parse (const char *brig_blob)
{
m_brig = brig_blob;
find_brig_sections ();
brig_basic_inst_handler inst_handler (*this);
brig_branch_inst_handler branch_inst_handler (*this);
brig_cvt_inst_handler cvt_inst_handler (*this);
@ -269,7 +356,6 @@ brig_to_generic::parse (const char *brig_blob)
handler = handlers[i].handler;
}
b += (*handler) (entry);
continue;
}
finish_function ();
@ -519,6 +605,29 @@ brig_to_generic::get_finished_function (tree func_decl)
return NULL;
}
/* Adds a group variable to a correct book keeping structure depending
on its segment. */
void
brig_to_generic::add_group_variable (const std::string &name, size_t size,
size_t alignment, bool function_scope)
{
/* Module and function scope group region variables are an experimental
feature. We implement module scope group variables with a separate
book keeping inside brig_to_generic which is populated in the 'analyze()'
prepass. This is to ensure we know the group segment offsets when
processing the functions that might refer to them. */
if (!function_scope)
{
if (!m_module_group_variables.has_variable (name))
m_module_group_variables.add (name, size, alignment);
return;
}
if (!m_cf->m_local_group_variables.has_variable (name))
m_cf->m_local_group_variables.add (name, size, alignment);
}
/* Finalizes the currently handled function. Should be called before
setting a new function. */
@ -567,38 +676,6 @@ brig_to_generic::start_function (tree f)
m_cf->m_func_decl = f;
}
/* Appends a new group variable (or an fbarrier) to the current kernel's
group segment. */
void
brig_to_generic::append_group_variable (const std::string &name, size_t size,
size_t alignment)
{
size_t align_padding = m_next_group_offset % alignment == 0 ?
0 : (alignment - m_next_group_offset % alignment);
m_next_group_offset += align_padding;
m_group_offsets[name] = m_next_group_offset;
m_next_group_offset += size;
}
size_t
brig_to_generic::group_variable_segment_offset (const std::string &name) const
{
var_offset_table::const_iterator i = m_group_offsets.find (name);
gcc_assert (i != m_group_offsets.end ());
return (*i).second;
}
/* The size of the group and private segments required by the currently
processed kernel. Private segment size must be multiplied by the
number of work-items in the launch, in case of a work-group function. */
size_t
brig_to_generic::group_segment_size () const
{
return m_next_group_offset;
}
/* Appends a new variable to the current kernel's private segment. */
void
@ -646,13 +723,6 @@ brig_to_generic::has_private_variable (const std::string &name) const
return i != m_private_data_sizes.end ();
}
bool
brig_to_generic::has_group_variable (const std::string &name) const
{
var_offset_table::const_iterator i = m_group_offsets.find (name);
return i != m_group_offsets.end ();
}
size_t
brig_to_generic::private_variable_size (const std::string &name) const
{
@ -662,6 +732,10 @@ brig_to_generic::private_variable_size (const std::string &name) const
return (*i).second;
}
/* The size of private segment required by a single work-item executing
the currently processed kernel. */
size_t
brig_to_generic::private_segment_size () const
{
@ -735,10 +809,11 @@ brig_to_generic::write_globals ()
cgraph_node::finalize_function (f->m_func_decl, true);
f->m_descriptor.is_kernel = 1;
/* TODO: analyze the kernel's actual group and private segment usage
using a call graph. Now the private and group mem sizes are overly
pessimistic in case of multiple kernels in the same module. */
f->m_descriptor.group_segment_size = group_segment_size ();
/* TODO: analyze the kernel's actual private and group segment usage
using call graph. Now the mem size is overly
pessimistic in case of multiple kernels in the same module.
*/
f->m_descriptor.group_segment_size = m_total_group_segment_usage;
f->m_descriptor.private_segment_size = private_segment_size ();
/* The kernarg size is rounded up to a multiple of 16 according to
@ -774,8 +849,6 @@ brig_to_generic::write_globals ()
delete[] vec;
for (size_t i = 0; i < m_brig_blobs.size (); ++i)
delete m_brig_blobs[i];
}
/* Returns an type with unsigned int elements corresponding to the

View File

@ -36,7 +36,6 @@
#include "hsa-brig-format.h"
#include "brig-function.h"
struct reg_decl_index_entry;
/* Converts an HSAIL BRIG input to GENERIC. This class holds global state
@ -56,6 +55,7 @@ private:
public:
brig_to_generic ();
void analyze (const char *brig_blob);
void parse (const char *brig_blob);
void write_globals ();
@ -78,17 +78,9 @@ public:
void start_function (tree f);
void finish_function ();
void append_group_variable (const std::string &name, size_t size,
size_t alignment);
void append_private_variable (const std::string &name, size_t size,
size_t alignment);
size_t group_variable_segment_offset (const std::string &name) const;
bool
has_group_variable (const std::string &name) const;
size_t
private_variable_segment_offset (const std::string &name) const;
@ -107,11 +99,13 @@ public:
{ return get_mangled_name_tmpl (var); }
std::string get_mangled_name (const BrigDirectiveExecutable *func) const;
size_t group_segment_size () const;
size_t private_segment_size () const;
brig_function *get_finished_function (tree func_decl);
void add_group_variable (const std::string &name, size_t size,
size_t alignment, bool function_scope);
static tree s_fp16_type;
static tree s_fp32_type;
static tree s_fp64_type;
@ -123,10 +117,21 @@ public:
/* The currently built function. */
brig_function *m_cf;
/* Stores the module and function scope group variable offsets. */
group_variable_offset_index m_module_group_variables;
/* The name of the currently handled BRIG module. */
std::string m_module_name;
/* Set to true if the compilation is in 'analyze' phase. */
bool m_analyzing;
/* Accumulates the total group segment usage. */
size_t m_total_group_segment_usage;
private:
void find_brig_sections ();
/* The BRIG blob and its different sections of the file currently being
parsed. */
const char *m_brig;
@ -144,10 +149,6 @@ private:
/* The size of each private variable, including the alignment padding. */
std::map<std::string, size_t> m_private_data_sizes;
/* The same for group variables. */
size_t m_next_group_offset;
var_offset_table m_group_offsets;
/* And private. */
size_t m_next_private_offset;
var_offset_table m_private_offsets;
@ -162,9 +163,6 @@ private:
for some interprocedural analysis. */
std::map<std::string, brig_function *> m_finished_functions;
/* The parsed BRIG blobs. Owned and will be deleted after use. */
std::vector<const char *> m_brig_blobs;
/* The original dump file. */
FILE *m_dump_file;

View File

@ -27,6 +27,34 @@ along with GCC; see the file COPYING3. If not see
#include "errors.h"
#include "diagnostic-core.h"
bool
group_variable_offset_index::has_variable (const std::string &name) const
{
varname_offset_table::const_iterator i = m_group_offsets.find (name);
return i != m_group_offsets.end ();
}
/* Adds a new group segment variable. */
void
group_variable_offset_index::add (const std::string &name, size_t size,
size_t alignment)
{
size_t align_padding = m_next_group_offset % alignment == 0 ?
0 : (alignment - m_next_group_offset % alignment);
m_next_group_offset += align_padding;
m_group_offsets[name] = m_next_group_offset;
m_next_group_offset += size;
}
size_t
group_variable_offset_index::segment_offset (const std::string &name) const
{
varname_offset_table::const_iterator i = m_group_offsets.find (name);
gcc_assert (i != m_group_offsets.end ());
return (*i).second;
}
/* Return true if operand number OPNUM of instruction with OPCODE is an output.
False if it is an input. Some code reused from Martin Jambor's gcc-hsa
tree. */

View File

@ -22,7 +22,33 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_BRIG_UTIL_H
#define GCC_BRIG_UTIL_H
#include "brig-to-generic.h"
#include <map>
#include "config.h"
#include "system.h"
#include "ansidecl.h"
#include "coretypes.h"
#include "opts.h"
#include "tree.h"
/* Helper class for keeping book of group variable offsets. */
class group_variable_offset_index
{
public:
group_variable_offset_index () : m_next_group_offset (0) {}
typedef std::map<std::string, size_t> varname_offset_table;
bool has_variable (const std::string &name) const;
void add (const std::string &name, size_t size, size_t alignment);
size_t segment_offset (const std::string &name) const;
size_t size () const { return m_next_group_offset; }
private:
size_t m_next_group_offset;
varname_offset_table m_group_offsets;
};
bool gccbrig_hsa_opcode_op_output_p (BrigOpcode16_t opcode, int opnum);

View File

@ -144,10 +144,25 @@ brig_directive_variable_handler::operator () (const BrigBase *base)
size_t alignment = get_brig_var_alignment (brigVar);
if (m_parent.m_cf != NULL)
bool function_scope = m_parent.m_cf != NULL;
if (function_scope)
m_parent.m_cf->m_function_scope_vars.insert (base);
std::string var_name = m_parent.get_mangled_name (brigVar);
if (brigVar->segment == BRIG_SEGMENT_GROUP)
{
/* Non-kernel scope group variables have been added at the
'analyze' stage. */
m_parent.add_group_variable (var_name, var_size, alignment,
function_scope);
return base->byteCount;
}
/* During analyze, handle only (module scope) group variables. */
if (m_parent.m_analyzing)
return base->byteCount;
if (brigVar->segment == BRIG_SEGMENT_KERNARG)
{
/* Do not create a real variable, but only a table of
@ -158,18 +173,6 @@ brig_directive_variable_handler::operator () (const BrigBase *base)
m_parent.m_cf->append_kernel_arg (brigVar, var_size, alignment);
return base->byteCount;
}
else if (brigVar->segment == BRIG_SEGMENT_GROUP)
{
/* Handle group region variables similarly as kernargs:
assign offsets to the group region on the fly when
a new module scope or function scope group variable is
introduced. These offsets will be then added to the
group_base hidden pointer passed to the kernel in order to
get the flat address. */
if (!m_parent.has_group_variable (var_name))
m_parent.append_group_variable (var_name, var_size, alignment);
return base->byteCount;
}
else if (brigVar->segment == BRIG_SEGMENT_PRIVATE
|| brigVar->segment == BRIG_SEGMENT_SPILL)
{

View File

@ -1,3 +1,11 @@
2017-09-27 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* brig.dg/test/gimple/fbarrier.hsail: Fixed tests to match the new
new group memory offsetting code in the BRIG frontend.
* brig.dg/test/gimple/function_calls.hsail: Likewise.
* brig.dg/test/gimple/smoke_test.hsail: Likewise.
* brig.dg/test/gimple/variables.hsail: Likewise.
2017-09-27 Jakub Jelinek <jakub@redhat.com>
PR c++/82159

View File

@ -65,10 +65,10 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
/* { dg-final { scan-tree-dump "__hsail_waitfbar \\\(0, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(0, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(32, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_joinfbar \\\(0, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(32, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(32, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(32, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */

View File

@ -46,7 +46,7 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
/* The generated function call should have the incoming arguments and three hidden arguments. */
/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, __private_base_addr\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, group_local_offset.*, __private_base_addr\\\);" "gimple"} } */
/* The callee should refer directly to the scalar arguments when it reads them. */
/* { dg-final { scan-tree-dump "= float_arg;" "gimple"} } */

View File

@ -42,7 +42,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
/* The kernel function itself should have a fingerprint as follows */
/* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */
/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, void \\\* __private_base_addr\\\)" "gimple"} } */
/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, unsigned int __group_local_offset, void \\\* __private_base_addr\\\)" "gimple"} } */
/* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */
/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */
@ -73,7 +73,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
/* The launcher should call __hsail_launch_wg_function in this case: */
/* Kernel (void * __context, void * __group_base_addr) */
/* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr\\\);" "gimple"} }*/
/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr, group_local_offset.*\\\);" "gimple"} }*/
/* The kernel should have the magic metadata section injected to the ELF. */
/* TODO: this should be disabled in case not outputting to an ELF. */
@ -85,7 +85,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
/* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;\[\n \]+__builtin___hsail_barrier \\\(__context\\\);\[\n \]+s3 = s0 \\\+ 4294967295;" "gimple"} } */
/* The kernel with the barrier call's launcher function should call the thread-spawning function. */
/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr\\\);" "gimple" } } */
/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr, group_local_offset.*\\\);" "gimple" } } */

View File

@ -3,7 +3,7 @@ module &module:1:0:$full:$large:$default;
/* Tests for different variable scopes and address spaces. */
/* { dg-do compile } */
/* { dg-options "-fdump-tree-gimple" } */
/* { dg-options "-fdump-tree-gimple -fdump-tree-original" } */
prog align(256) private_u32 &prog_private;
private_u32 &mod_private;
@ -29,7 +29,10 @@ prog function &subfunction(arg_u32 %return_value)(arg_u32 %arg) {
ld_private_u32 $s200, [%func_private];
st_private_u32 $s200, [&prog_private];
/* { dg-final { scan-tree-dump "__group_base_addr \\\+ \\\(0 \\\+" "original" } } */
ld_group_u32 $s203, [%func_group];
/* { dg-final { scan-tree-dump "__group_base_addr \\\+ 0" "original" } } */
st_group_u32 $s203, [&prog_group];
ld_global_u32 $s204, [%func_global];
@ -104,8 +107,6 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
kern_group @12 (3)
*/
/* { dg-final { scan-tree-dump "\\\+ 8;.*\\\+ 12;.*\\\+ 4;" "gimple" } } */
/* The "mangling" of the global and readonly vars. */
/* { dg-final { scan-tree-dump "\[ \]*prog_global = s204;" "gimple" } } */

View File

@ -1,7 +1,15 @@
2017-09-27 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* include/internal/phsa-rt.h: Support for improved group segment
handling with a stack-like allocation scheme.
* include/internal/workitems.h: Likewise.
* rt/workitems.c: Likewise.
2017-09-25 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* rt/workitems.c: Assume the host runtime allocates the work group
memory.
2017-05-03 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* rt/workitems.c: Removed a leftover comment.

View File

@ -42,7 +42,8 @@ typedef void (*gccbrigKernelLauncherFunc) (void *context, void *);
/* Pointer type for kernel functions produced by gccbrig from the HSAIL.
This is private from outside the device binary and only called by
the launcher. */
typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, void *);
typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t,
void *);
/* Context data that is passed to the kernel function, initialized
by the runtime to the current launch information. The data is

View File

@ -63,6 +63,11 @@ typedef struct
to the work-group. */
void *group_base_ptr;
/* The offset in the group memory for the kernel local group variables.
To support module scope group variables, there might be need to preseve
room for them in the beginning of the group segment. */
uint32_t initial_group_offset;
/* Similarly to the private segment that gets space allocated for all
WIs in the work-group. */
void *private_base_ptr;

View File

@ -113,7 +113,7 @@ phsa_work_item_thread (int arg0, int arg1)
&& wi->z < __hsail_currentworkgroupsize (2, wi))
{
l_data->kernel (l_data->kernarg_addr, wi, wg->group_base_ptr,
wg->private_base_ptr);
wg->initial_group_offset, wg->private_base_ptr);
#ifdef DEBUG_PHSA_RT
printf ("done.\n");
#endif
@ -221,7 +221,8 @@ phsa_work_item_thread (int arg0, int arg1)
static void
phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
uint32_t group_local_offset, size_t wg_size_x,
size_t wg_size_y, size_t wg_size_z)
{
PHSAWorkItem *wi_threads = NULL;
PHSAWorkGroup wg;
@ -247,6 +248,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
wg.alloca_stack_p = wg.private_segment_total_size;
wg.alloca_frame_p = wg.alloca_stack_p;
wg.initial_group_offset = group_local_offset;
#ifdef EXECUTE_WGS_BACKWARDS
wg.x = context->wg_max_x - 1;
@ -313,7 +315,8 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
them execute all the WGs, including a potential partial WG. */
static void
phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr)
phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr,
uint32_t group_local_offset)
{
hsa_kernel_dispatch_packet_t *dp = context->dp;
size_t x, y, z;
@ -361,8 +364,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr)
dp->grid_size_y, dp->grid_size_z);
#endif
phsa_execute_wi_gang (context, group_base_ptr, sat_wg_size_x, sat_wg_size_y,
sat_wg_size_z);
phsa_execute_wi_gang (context, group_base_ptr, group_local_offset,
sat_wg_size_x, sat_wg_size_y, sat_wg_size_z);
}
#endif
@ -374,7 +377,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr)
execute massive numbers of work-items in a non-SPMD machine than fibers
(easily 100x faster). */
static void
phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr)
phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr,
uint32_t group_local_offset)
{
hsa_kernel_dispatch_packet_t *dp = context->dp;
size_t x, y, z, wg_x, wg_y, wg_z;
@ -462,7 +466,7 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr)
wi.wg->z = wg_z;
context->kernel (context->kernarg_addr, &wi, group_base_ptr,
private_base_ptr);
group_local_offset, private_base_ptr);
#if defined (BENCHMARK_PHSA_RT)
wg_count++;
@ -527,19 +531,20 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr)
void
__hsail_launch_kernel (gccbrigKernelFunc kernel, PHSAKernelLaunchData *context,
void *group_base_ptr)
void *group_base_ptr, uint32_t group_local_offset)
{
context->kernel = kernel;
phsa_spawn_work_items (context, group_base_ptr);
phsa_spawn_work_items (context, group_base_ptr, group_local_offset);
}
#endif
void
__hsail_launch_wg_function (gccbrigKernelFunc kernel,
PHSAKernelLaunchData *context, void *group_base_ptr)
PHSAKernelLaunchData *context, void *group_base_ptr,
uint32_t group_local_offset)
{
context->kernel = kernel;
phsa_execute_work_groups (context, group_base_ptr);
phsa_execute_work_groups (context, group_base_ptr, group_local_offset);
}
uint32_t