amdgcn: Add -mxnack and -msram-ecc [PR 100208]

gcc/ChangeLog:

	PR target/100208
	* config/gcn/gcn-hsa.h (DRIVER_SELF_SPECS): New.
	(ASM_SPEC): Set -mattr for xnack and sram-ecc.
	* config/gcn/gcn-opts.h (enum sram_ecc_type): New.
	* config/gcn/gcn-valu.md: Add a warning comment.
	* config/gcn/gcn.c (gcn_option_override): Add "sorry" for -mxnack.
	(output_file_start): Add xnack and sram-ecc state to ".amdgcn_target".
	* config/gcn/gcn.md: Add a warning comment.
	* config/gcn/gcn.opt: Add -mxnack and -msram-ecc.
	* config/gcn/mkoffload.c (EF_AMDGPU_MACH_AMDGCN_GFX908): Remove
	SRAM-ECC flag.
	(EF_AMDGPU_XNACK): New.
	(EF_AMDGPU_SRAM_ECC): New.
	(elf_flags): New.
	(copy_early_debug_info): Use elf_flags.
	(main): Handle -mxnack and -msram-ecc options.
	* doc/invoke.texi: Document -mxnack and -msram-ecc.

gcc/testsuite/ChangeLog:

	PR target/100208
	* gcc.target/gcn/sram-ecc-1.c: New test.
	* gcc.target/gcn/sram-ecc-2.c: New test.
	* gcc.target/gcn/sram-ecc-3.c: New test.
	* gcc.target/gcn/sram-ecc-4.c: New test.
	* gcc.target/gcn/sram-ecc-5.c: New test.
	* gcc.target/gcn/sram-ecc-6.c: New test.
	* gcc.target/gcn/sram-ecc-7.c: New test.
	* gcc.target/gcn/sram-ecc-8.c: New test.

(cherry picked from commit aad32a00b7)
This commit is contained in:
Andrew Stubbs 2021-07-08 15:47:53 +01:00 committed by Tobias Burnus
parent e85d6ef1c4
commit 9552afb2ae
16 changed files with 273 additions and 4 deletions

View File

@ -75,9 +75,15 @@ extern unsigned int gcn_local_sym_hash (const char *name);
supported for gcn. */
#define GOMP_SELF_SPECS ""
#define DRIVER_SELF_SPECS \
"%{march=fiji|march=gfx900|march=gfx906:%{!msram-ecc=*:-msram-ecc=off}}"
/* Use LLVM assembler and linker options. */
#define ASM_SPEC "-triple=amdgcn--amdhsa " \
"%:last_arg(%{march=*:-mcpu=%*}) " \
"-mattr=%{mxnack:+xnack;:-xnack} " \
/* FIXME: support "any" when we move to HSACOv4. */ \
"-mattr=%{!msram-ecc=off:+sram-ecc;:-sram-ecc} " \
"-filetype=obj"
#define LINK_SPEC "--pie --export-dynamic"
#define LIB_SPEC "-lc"

View File

@ -34,4 +34,11 @@ extern int gcn_isa;
#define TARGET_GCN5 (gcn_isa == 5)
#define TARGET_GCN5_PLUS (gcn_isa >= 5)
enum sram_ecc_type
{
SRAM_ECC_OFF,
SRAM_ECC_ON,
SRAM_ECC_ANY
};
#endif

View File

@ -703,6 +703,8 @@
;; - The address space and glc (volatile) fields are there to replace the
;; fields normally found in a MEM.
;; - Multiple forms of address expression are supported, below.
;;
;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
(define_expand "gather_load<mode><vnsi>"
[(match_operand:V_ALL 0 "register_operand")

View File

@ -143,6 +143,10 @@ gcn_option_override (void)
/* 1MB total. */
stack_size_opt = 1048576;
}
/* The xnack option is a placeholder, for now. */
if (flag_xnack)
sorry ("XNACK support");
}
/* }}} */
@ -5035,11 +5039,16 @@ output_file_start (void)
case PROCESSOR_FIJI: cpu = "gfx803"; break;
case PROCESSOR_VEGA10: cpu = "gfx900"; break;
case PROCESSOR_VEGA20: cpu = "gfx906"; break;
case PROCESSOR_GFX908: cpu = "gfx908+sram-ecc"; break;
case PROCESSOR_GFX908: cpu = "gfx908"; break;
default: gcc_unreachable ();
}
fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s\"\n", cpu);
const char *xnack = (flag_xnack ? "+xnack" : "");
/* FIXME: support "any" when we move to HSACOv4. */
const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : "");
fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
cpu, xnack, sram_ecc);
}
/* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.

View File

@ -567,6 +567,7 @@
(set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
; 8/16bit move pattern
; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
(define_insn "*mov<mode>_insn"
[(set (match_operand:QIHI 0 "nonimmediate_operand"

View File

@ -76,3 +76,24 @@ Target RejectNegative Joined UInteger Var(stack_size_opt) Init(-1)
Wopenacc-dims
Target Var(warn_openacc_dims) Warning
Warn about invalid OpenACC dimensions.
mxnack
Target Var(flag_xnack) Init(0)
Compile for devices requiring XNACK enabled. Default off.
Enum
Name(sram_ecc_type) Type(enum sram_ecc_type)
SRAM-ECC modes:
EnumValue
Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF)
EnumValue
Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON)
EnumValue
Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY)
msram-ecc=
Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY)
Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".

View File

@ -52,7 +52,10 @@
#undef EF_AMDGPU_MACH_AMDGCN_GFX906
#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
#undef EF_AMDGPU_MACH_AMDGCN_GFX908
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x230 // Assume SRAM-ECC enabled.
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
#define EF_AMDGPU_XNACK 0x100
#define EF_AMDGPU_SRAM_ECC 0x200
#ifndef R_AMDGPU_NONE
#define R_AMDGPU_NONE 0
@ -77,6 +80,7 @@ static struct obstack files_to_cleanup;
enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture.
uint32_t elf_flags = 0;
/* Delete tempfiles. */
@ -298,7 +302,7 @@ copy_early_debug_info (const char *infile, const char *outfile)
ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA;
ehdr.e_type = ET_REL;
ehdr.e_machine = EM_AMDGPU;
ehdr.e_flags = elf_arch;
ehdr.e_flags = elf_arch | elf_flags;
/* Load the section headers so we can walk them later. */
Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
@ -823,6 +827,7 @@ main (int argc, char **argv)
bool fopenacc = false;
bool fPIC = false;
bool fpic = false;
bool sram_seen = false;
for (int i = 1; i < argc; i++)
{
#define STR "-foffload-abi="
@ -845,6 +850,26 @@ main (int argc, char **argv)
fPIC = true;
else if (strcmp (argv[i], "-fpic") == 0)
fpic = true;
else if (strcmp (argv[i], "-mxnack") == 0)
elf_flags |= EF_AMDGPU_XNACK;
else if (strcmp (argv[i], "-mno-xnack") == 0)
elf_flags &= ~EF_AMDGPU_XNACK;
else if (strcmp (argv[i], "-msram-ecc=on") == 0)
{
elf_flags |= EF_AMDGPU_SRAM_ECC;
sram_seen = true;
}
else if (strcmp (argv[i], "-msram-ecc=any") == 0)
{
/* FIXME: change this when we move to HSACOv4. */
elf_flags |= EF_AMDGPU_SRAM_ECC;
sram_seen = true;
}
else if (strcmp (argv[i], "-msram-ecc=off") == 0)
{
elf_flags &= ~EF_AMDGPU_SRAM_ECC;
sram_seen = true;
}
else if (strcmp (argv[i], "-save-temps") == 0)
save_temps = true;
else if (strcmp (argv[i], "-v") == 0)
@ -865,6 +890,21 @@ main (int argc, char **argv)
if (!(fopenacc ^ fopenmp))
fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
/* The SRAM-ECC feature defaults to "any" on GPUs where the feature is
available. */
if (!sram_seen)
switch (elf_arch)
{
case EF_AMDGPU_MACH_AMDGCN_GFX803:
case EF_AMDGPU_MACH_AMDGCN_GFX900:
case EF_AMDGPU_MACH_AMDGCN_GFX906:
break;
default:
/* FIXME: change this when we move to HSACOv4. */
elf_flags |= EF_AMDGPU_SRAM_ECC;
break;
}
const char *abi;
switch (offload_abi)
{
@ -892,6 +932,12 @@ main (int argc, char **argv)
obstack_ptr_grow (&cc_argv_obstack, "-xlto");
if (fopenmp)
obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
obstack_ptr_grow (&cc_argv_obstack,
(elf_flags & EF_AMDGPU_XNACK
? "-mxnack" : "-mno-xnack"));
obstack_ptr_grow (&cc_argv_obstack,
(elf_flags & EF_AMDGPU_SRAM_ECC
? "-msram-ecc=on" : "-msram-ecc=off"));
for (int ix = 1; ix != argc; ix++)
{
@ -993,6 +1039,14 @@ main (int argc, char **argv)
}
obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
obstack_ptr_grow (&ld_argv_obstack,
(elf_flags & EF_AMDGPU_XNACK
? "-mxnack" : "-mno-xnack"));
obstack_ptr_grow (&ld_argv_obstack,
(elf_flags & EF_AMDGPU_SRAM_ECC
? "-msram-ecc=on" : "-msram-ecc=off"));
if (verbose)
obstack_ptr_grow (&ld_argv_obstack, "-v");
for (int i = 1; i < argc; i++)
if (strncmp (argv[i], "-l", 2) == 0

View File

@ -18686,6 +18686,15 @@ Compile for GCN5 Vega 20 devices (gfx906).
@end table
@item -msram-ecc=on
@itemx -msram-ecc=off
@itemx -msram-ecc=any
@opindex msram-ecc
Compile binaries suitable for devices with the SRAM-ECC feature enabled,
disabled, or either mode. This feature can be enabled per-process on some
devices. The compiled code must match the device mode. The default is
@samp{any}, for devices that support it.
@item -mstack-size=@var{bytes}
@opindex mstack-size
Specify how many @var{bytes} of stack space will be requested for each GPU
@ -18694,6 +18703,14 @@ available. The size of the stack allocation may also have an impact on
run-time performance. The default is 32KB when using OpenACC or OpenMP, and
1MB otherwise.
@item -mxnack
@opindex mxnack
Compile binaries suitable for devices with the XNACK feature enabled. Some
devices always require XNACK and some allow the user to configure XNACK. The
compiled code must match the device mode. The default is @samp{-mno-xnack}.
At present this option is a placeholder for support that is not yet
implemented.
@end table
@node ARC Options

View File

@ -0,0 +1,17 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets without sram-ecc enabled (in which sub-dword loads do not
zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -msram-ecc=off" } */
extern unsigned char c;
unsigned int
f ()
{
return c;
}
/* { dg-final { scan-assembler "lshl.* 24" } } */
/* { dg-final { scan-assembler "lshr.* 24" } } */

View File

@ -0,0 +1,17 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets without sram-ecc enabled (in which sub-dword loads do not
zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -msram-ecc=off" } */
extern unsigned short s;
unsigned short
f ()
{
return s;
}
/* { dg-final { scan-assembler "lshl.* 16" } } */
/* { dg-final { scan-assembler "lshr.* 16" } } */

View File

@ -0,0 +1,21 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets without sram-ecc enabled (in which sub-dword loads do not
zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
typedef unsigned char v64qi __attribute__ ((vector_size (64*1)));
extern v64si a;
extern v64qi b;
void
f ()
{
for (int n = 0; n < 64; n++)
a[n] = b[n];
}
/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */

View File

@ -0,0 +1,21 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets without sram-ecc enabled (in which sub-dword loads do not
zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
typedef unsigned short v64hi __attribute__ ((vector_size (64*2)));
extern v64si a;
extern v64hi b;
void
f ()
{
for (int n = 0; n < 64; n++)
a[n] = b[n];
}
/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */

View File

@ -0,0 +1,17 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets that may not have sram-ecc enabled (in which sub-dword loads do
not zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -msram-ecc=any" } */
extern unsigned char c;
unsigned int
f ()
{
return c;
}
/* { dg-final { scan-assembler "lshl.* 24" } } */
/* { dg-final { scan-assembler "lshr.* 24" } } */

View File

@ -0,0 +1,17 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets that may not have sram-ecc enabled (in which sub-dword loads do
not zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -msram-ecc=any" } */
extern unsigned short s;
unsigned short
f ()
{
return s;
}
/* { dg-final { scan-assembler "lshl.* 16" } } */
/* { dg-final { scan-assembler "lshr.* 16" } } */

View File

@ -0,0 +1,21 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets that may not have sram-ecc enabled (in which sub-dword loads do
not zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
typedef unsigned char v64qi __attribute__ ((vector_size (64*1)));
extern v64si a;
extern v64qi b;
void
f ()
{
for (int n = 0; n < 64; n++)
a[n] = b[n];
}
/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */

View File

@ -0,0 +1,21 @@
/* Ensure that explicit zero-entend instructions are present when compiling
for targets that may not have sram-ecc enabled (in which sub-dword loads do
not zero the high bits of the target register). */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
typedef unsigned short v64hi __attribute__ ((vector_size (64*2)));
extern v64si a;
extern v64hi b;
void
f ()
{
for (int n = 0; n < 64; n++)
a[n] = b[n];
}
/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */