openacc: Fix mkoffload SGPR/VGPR count parsing for HSACO v3
If an offload kernel uses a large number of VGPRs, AMD GCN hardware may need to limit the number of threads/workers launched for that kernel. The number of SGPRs/VGPRs in use is detected by mkoffload and recorded in the processed output. The patterns emitted detailing SGPR/VGPR occupancy changed between HSACO v2 and v3 though, so this patch updates parsing to account for that. 2020-09-08 Julian Brown <julian@codesourcery.com> gcc/ * config/gcn/mkoffload.c (process_asm): Initialise regcount. Update scanning for SGPR/VGPR usage for HSACO v3.
This commit is contained in:
parent
d6d9be7c6b
commit
3aee3aaf48
@ -432,7 +432,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
||||
int sgpr_count;
|
||||
int vgpr_count;
|
||||
char *kernel_name;
|
||||
} regcount;
|
||||
} regcount = { -1, -1, NULL };
|
||||
|
||||
/* Always add _init_array and _fini_array as kernels. */
|
||||
obstack_ptr_grow (&fns_os, xstrdup ("_init_array"));
|
||||
@ -440,7 +440,12 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
||||
fn_count += 2;
|
||||
|
||||
char buf[1000];
|
||||
enum { IN_CODE, IN_AMD_KERNEL_CODE_T, IN_VARS, IN_FUNCS } state = IN_CODE;
|
||||
enum
|
||||
{ IN_CODE,
|
||||
IN_METADATA,
|
||||
IN_VARS,
|
||||
IN_FUNCS
|
||||
} state = IN_CODE;
|
||||
while (fgets (buf, sizeof (buf), in))
|
||||
{
|
||||
switch (state)
|
||||
@ -453,21 +458,25 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
||||
obstack_grow (&dims_os, &dim, sizeof (dim));
|
||||
dims_count++;
|
||||
}
|
||||
else if (sscanf (buf, " .amdgpu_hsa_kernel %ms\n",
|
||||
®count.kernel_name) == 1)
|
||||
break;
|
||||
|
||||
break;
|
||||
}
|
||||
case IN_AMD_KERNEL_CODE_T:
|
||||
case IN_METADATA:
|
||||
{
|
||||
gcc_assert (regcount.kernel_name);
|
||||
if (sscanf (buf, " wavefront_sgpr_count = %d\n",
|
||||
®count.sgpr_count) == 1)
|
||||
if (sscanf (buf, " - .name: %ms\n", ®count.kernel_name) == 1)
|
||||
break;
|
||||
else if (sscanf (buf, " workitem_vgpr_count = %d\n",
|
||||
else if (sscanf (buf, " .sgpr_count: %d\n",
|
||||
®count.sgpr_count) == 1)
|
||||
{
|
||||
gcc_assert (regcount.kernel_name);
|
||||
break;
|
||||
}
|
||||
else if (sscanf (buf, " .vgpr_count: %d\n",
|
||||
®count.vgpr_count) == 1)
|
||||
break;
|
||||
{
|
||||
gcc_assert (regcount.kernel_name);
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
@ -508,9 +517,10 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
||||
state = IN_VARS;
|
||||
else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
|
||||
state = IN_FUNCS;
|
||||
else if (sscanf (buf, " .amd_kernel_code_%c", &dummy) > 0)
|
||||
else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
|
||||
{
|
||||
state = IN_AMD_KERNEL_CODE_T;
|
||||
state = IN_METADATA;
|
||||
regcount.kernel_name = NULL;
|
||||
regcount.sgpr_count = regcount.vgpr_count = -1;
|
||||
}
|
||||
else if (sscanf (buf, " .section %c", &dummy) > 0
|
||||
@ -519,7 +529,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
||||
|| sscanf (buf, " .data%c", &dummy) > 0
|
||||
|| sscanf (buf, " .ident %c", &dummy) > 0)
|
||||
state = IN_CODE;
|
||||
else if (sscanf (buf, " .end_amd_kernel_code_%c", &dummy) > 0)
|
||||
else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0)
|
||||
{
|
||||
state = IN_CODE;
|
||||
gcc_assert (regcount.kernel_name != NULL
|
||||
@ -531,7 +541,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
||||
regcount.sgpr_count = regcount.vgpr_count = -1;
|
||||
}
|
||||
|
||||
if (state == IN_CODE || state == IN_AMD_KERNEL_CODE_T)
|
||||
if (state == IN_CODE || state == IN_METADATA)
|
||||
fputs (buf, out);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user