amdgcn: Add -mxnack and -msram-ecc [PR 100208]
gcc/ChangeLog:
PR target/100208
* config/gcn/gcn-hsa.h (DRIVER_SELF_SPECS): New.
(ASM_SPEC): Set -mattr for xnack and sram-ecc.
* config/gcn/gcn-opts.h (enum sram_ecc_type): New.
* config/gcn/gcn-valu.md: Add a warning comment.
* config/gcn/gcn.c (gcn_option_override): Add "sorry" for -mxnack.
(output_file_start): Add xnack and sram-ecc state to ".amdgcn_target".
* config/gcn/gcn.md: Add a warning comment.
* config/gcn/gcn.opt: Add -mxnack and -msram-ecc.
* config/gcn/mkoffload.c (EF_AMDGPU_MACH_AMDGCN_GFX908): Remove
SRAM-ECC flag.
(EF_AMDGPU_XNACK): New.
(EF_AMDGPU_SRAM_ECC): New.
(elf_flags): New.
(copy_early_debug_info): Use elf_flags.
(main): Handle -mxnack and -msram-ecc options.
* doc/invoke.texi: Document -mxnack and -msram-ecc.
gcc/testsuite/ChangeLog:
PR target/100208
* gcc.target/gcn/sram-ecc-1.c: New test.
* gcc.target/gcn/sram-ecc-2.c: New test.
* gcc.target/gcn/sram-ecc-3.c: New test.
* gcc.target/gcn/sram-ecc-4.c: New test.
* gcc.target/gcn/sram-ecc-5.c: New test.
* gcc.target/gcn/sram-ecc-6.c: New test.
* gcc.target/gcn/sram-ecc-7.c: New test.
* gcc.target/gcn/sram-ecc-8.c: New test.
(cherry picked from commit aad32a00b7
)
This commit is contained in:
parent
e85d6ef1c4
commit
9552afb2ae
@ -75,9 +75,15 @@ extern unsigned int gcn_local_sym_hash (const char *name);
|
||||
supported for gcn. */
|
||||
#define GOMP_SELF_SPECS ""
|
||||
|
||||
#define DRIVER_SELF_SPECS \
|
||||
"%{march=fiji|march=gfx900|march=gfx906:%{!msram-ecc=*:-msram-ecc=off}}"
|
||||
|
||||
/* Use LLVM assembler and linker options. */
|
||||
#define ASM_SPEC "-triple=amdgcn--amdhsa " \
|
||||
"%:last_arg(%{march=*:-mcpu=%*}) " \
|
||||
"-mattr=%{mxnack:+xnack;:-xnack} " \
|
||||
/* FIXME: support "any" when we move to HSACOv4. */ \
|
||||
"-mattr=%{!msram-ecc=off:+sram-ecc;:-sram-ecc} " \
|
||||
"-filetype=obj"
|
||||
#define LINK_SPEC "--pie --export-dynamic"
|
||||
#define LIB_SPEC "-lc"
|
||||
|
@ -34,4 +34,11 @@ extern int gcn_isa;
|
||||
#define TARGET_GCN5 (gcn_isa == 5)
|
||||
#define TARGET_GCN5_PLUS (gcn_isa >= 5)
|
||||
|
||||
enum sram_ecc_type
|
||||
{
|
||||
SRAM_ECC_OFF,
|
||||
SRAM_ECC_ON,
|
||||
SRAM_ECC_ANY
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -703,6 +703,8 @@
|
||||
;; - The address space and glc (volatile) fields are there to replace the
|
||||
;; fields normally found in a MEM.
|
||||
;; - Multiple forms of address expression are supported, below.
|
||||
;;
|
||||
;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
|
||||
|
||||
(define_expand "gather_load<mode><vnsi>"
|
||||
[(match_operand:V_ALL 0 "register_operand")
|
||||
|
@ -143,6 +143,10 @@ gcn_option_override (void)
|
||||
/* 1MB total. */
|
||||
stack_size_opt = 1048576;
|
||||
}
|
||||
|
||||
/* The xnack option is a placeholder, for now. */
|
||||
if (flag_xnack)
|
||||
sorry ("XNACK support");
|
||||
}
|
||||
|
||||
/* }}} */
|
||||
@ -5035,11 +5039,16 @@ output_file_start (void)
|
||||
case PROCESSOR_FIJI: cpu = "gfx803"; break;
|
||||
case PROCESSOR_VEGA10: cpu = "gfx900"; break;
|
||||
case PROCESSOR_VEGA20: cpu = "gfx906"; break;
|
||||
case PROCESSOR_GFX908: cpu = "gfx908+sram-ecc"; break;
|
||||
case PROCESSOR_GFX908: cpu = "gfx908"; break;
|
||||
default: gcc_unreachable ();
|
||||
}
|
||||
|
||||
fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s\"\n", cpu);
|
||||
const char *xnack = (flag_xnack ? "+xnack" : "");
|
||||
/* FIXME: support "any" when we move to HSACOv4. */
|
||||
const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : "");
|
||||
|
||||
fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
|
||||
cpu, xnack, sram_ecc);
|
||||
}
|
||||
|
||||
/* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
|
||||
|
@ -567,6 +567,7 @@
|
||||
(set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
|
||||
|
||||
; 8/16bit move pattern
|
||||
; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
|
||||
|
||||
(define_insn "*mov<mode>_insn"
|
||||
[(set (match_operand:QIHI 0 "nonimmediate_operand"
|
||||
|
@ -76,3 +76,24 @@ Target RejectNegative Joined UInteger Var(stack_size_opt) Init(-1)
|
||||
Wopenacc-dims
|
||||
Target Var(warn_openacc_dims) Warning
|
||||
Warn about invalid OpenACC dimensions.
|
||||
|
||||
mxnack
|
||||
Target Var(flag_xnack) Init(0)
|
||||
Compile for devices requiring XNACK enabled. Default off.
|
||||
|
||||
Enum
|
||||
Name(sram_ecc_type) Type(enum sram_ecc_type)
|
||||
SRAM-ECC modes:
|
||||
|
||||
EnumValue
|
||||
Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF)
|
||||
|
||||
EnumValue
|
||||
Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON)
|
||||
|
||||
EnumValue
|
||||
Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY)
|
||||
|
||||
msram-ecc=
|
||||
Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY)
|
||||
Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".
|
||||
|
@ -52,7 +52,10 @@
|
||||
#undef EF_AMDGPU_MACH_AMDGCN_GFX906
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
|
||||
#undef EF_AMDGPU_MACH_AMDGCN_GFX908
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x230 // Assume SRAM-ECC enabled.
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
|
||||
|
||||
#define EF_AMDGPU_XNACK 0x100
|
||||
#define EF_AMDGPU_SRAM_ECC 0x200
|
||||
|
||||
#ifndef R_AMDGPU_NONE
|
||||
#define R_AMDGPU_NONE 0
|
||||
@ -77,6 +80,7 @@ static struct obstack files_to_cleanup;
|
||||
|
||||
enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
|
||||
uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture.
|
||||
uint32_t elf_flags = 0;
|
||||
|
||||
/* Delete tempfiles. */
|
||||
|
||||
@ -298,7 +302,7 @@ copy_early_debug_info (const char *infile, const char *outfile)
|
||||
ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA;
|
||||
ehdr.e_type = ET_REL;
|
||||
ehdr.e_machine = EM_AMDGPU;
|
||||
ehdr.e_flags = elf_arch;
|
||||
ehdr.e_flags = elf_arch | elf_flags;
|
||||
|
||||
/* Load the section headers so we can walk them later. */
|
||||
Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
|
||||
@ -823,6 +827,7 @@ main (int argc, char **argv)
|
||||
bool fopenacc = false;
|
||||
bool fPIC = false;
|
||||
bool fpic = false;
|
||||
bool sram_seen = false;
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
#define STR "-foffload-abi="
|
||||
@ -845,6 +850,26 @@ main (int argc, char **argv)
|
||||
fPIC = true;
|
||||
else if (strcmp (argv[i], "-fpic") == 0)
|
||||
fpic = true;
|
||||
else if (strcmp (argv[i], "-mxnack") == 0)
|
||||
elf_flags |= EF_AMDGPU_XNACK;
|
||||
else if (strcmp (argv[i], "-mno-xnack") == 0)
|
||||
elf_flags &= ~EF_AMDGPU_XNACK;
|
||||
else if (strcmp (argv[i], "-msram-ecc=on") == 0)
|
||||
{
|
||||
elf_flags |= EF_AMDGPU_SRAM_ECC;
|
||||
sram_seen = true;
|
||||
}
|
||||
else if (strcmp (argv[i], "-msram-ecc=any") == 0)
|
||||
{
|
||||
/* FIXME: change this when we move to HSACOv4. */
|
||||
elf_flags |= EF_AMDGPU_SRAM_ECC;
|
||||
sram_seen = true;
|
||||
}
|
||||
else if (strcmp (argv[i], "-msram-ecc=off") == 0)
|
||||
{
|
||||
elf_flags &= ~EF_AMDGPU_SRAM_ECC;
|
||||
sram_seen = true;
|
||||
}
|
||||
else if (strcmp (argv[i], "-save-temps") == 0)
|
||||
save_temps = true;
|
||||
else if (strcmp (argv[i], "-v") == 0)
|
||||
@ -865,6 +890,21 @@ main (int argc, char **argv)
|
||||
if (!(fopenacc ^ fopenmp))
|
||||
fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
|
||||
|
||||
/* The SRAM-ECC feature defaults to "any" on GPUs where the feature is
|
||||
available. */
|
||||
if (!sram_seen)
|
||||
switch (elf_arch)
|
||||
{
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX803:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX900:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX906:
|
||||
break;
|
||||
default:
|
||||
/* FIXME: change this when we move to HSACOv4. */
|
||||
elf_flags |= EF_AMDGPU_SRAM_ECC;
|
||||
break;
|
||||
}
|
||||
|
||||
const char *abi;
|
||||
switch (offload_abi)
|
||||
{
|
||||
@ -892,6 +932,12 @@ main (int argc, char **argv)
|
||||
obstack_ptr_grow (&cc_argv_obstack, "-xlto");
|
||||
if (fopenmp)
|
||||
obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
|
||||
obstack_ptr_grow (&cc_argv_obstack,
|
||||
(elf_flags & EF_AMDGPU_XNACK
|
||||
? "-mxnack" : "-mno-xnack"));
|
||||
obstack_ptr_grow (&cc_argv_obstack,
|
||||
(elf_flags & EF_AMDGPU_SRAM_ECC
|
||||
? "-msram-ecc=on" : "-msram-ecc=off"));
|
||||
|
||||
for (int ix = 1; ix != argc; ix++)
|
||||
{
|
||||
@ -993,6 +1039,14 @@ main (int argc, char **argv)
|
||||
}
|
||||
obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
|
||||
obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
|
||||
obstack_ptr_grow (&ld_argv_obstack,
|
||||
(elf_flags & EF_AMDGPU_XNACK
|
||||
? "-mxnack" : "-mno-xnack"));
|
||||
obstack_ptr_grow (&ld_argv_obstack,
|
||||
(elf_flags & EF_AMDGPU_SRAM_ECC
|
||||
? "-msram-ecc=on" : "-msram-ecc=off"));
|
||||
if (verbose)
|
||||
obstack_ptr_grow (&ld_argv_obstack, "-v");
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
if (strncmp (argv[i], "-l", 2) == 0
|
||||
|
@ -18686,6 +18686,15 @@ Compile for GCN5 Vega 20 devices (gfx906).
|
||||
|
||||
@end table
|
||||
|
||||
@item -msram-ecc=on
|
||||
@itemx -msram-ecc=off
|
||||
@itemx -msram-ecc=any
|
||||
@opindex msram-ecc
|
||||
Compile binaries suitable for devices with the SRAM-ECC feature enabled,
|
||||
disabled, or either mode. This feature can be enabled per-process on some
|
||||
devices. The compiled code must match the device mode. The default is
|
||||
@samp{any}, for devices that support it.
|
||||
|
||||
@item -mstack-size=@var{bytes}
|
||||
@opindex mstack-size
|
||||
Specify how many @var{bytes} of stack space will be requested for each GPU
|
||||
@ -18694,6 +18703,14 @@ available. The size of the stack allocation may also have an impact on
|
||||
run-time performance. The default is 32KB when using OpenACC or OpenMP, and
|
||||
1MB otherwise.
|
||||
|
||||
@item -mxnack
|
||||
@opindex mxnack
|
||||
Compile binaries suitable for devices with the XNACK feature enabled. Some
|
||||
devices always require XNACK and some allow the user to configure XNACK. The
|
||||
compiled code must match the device mode. The default is @samp{-mno-xnack}.
|
||||
At present this option is a placeholder for support that is not yet
|
||||
implemented.
|
||||
|
||||
@end table
|
||||
|
||||
@node ARC Options
|
||||
|
17
gcc/testsuite/gcc.target/gcn/sram-ecc-1.c
Normal file
17
gcc/testsuite/gcc.target/gcn/sram-ecc-1.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets without sram-ecc enabled (in which sub-dword loads do not
|
||||
zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msram-ecc=off" } */
|
||||
|
||||
extern unsigned char c;
|
||||
|
||||
unsigned int
|
||||
f ()
|
||||
{
|
||||
return c;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "lshl.* 24" } } */
|
||||
/* { dg-final { scan-assembler "lshr.* 24" } } */
|
17
gcc/testsuite/gcc.target/gcn/sram-ecc-2.c
Normal file
17
gcc/testsuite/gcc.target/gcn/sram-ecc-2.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets without sram-ecc enabled (in which sub-dword loads do not
|
||||
zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msram-ecc=off" } */
|
||||
|
||||
extern unsigned short s;
|
||||
|
||||
unsigned short
|
||||
f ()
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "lshl.* 16" } } */
|
||||
/* { dg-final { scan-assembler "lshr.* 16" } } */
|
21
gcc/testsuite/gcc.target/gcn/sram-ecc-3.c
Normal file
21
gcc/testsuite/gcc.target/gcn/sram-ecc-3.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets without sram-ecc enabled (in which sub-dword loads do not
|
||||
zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */
|
||||
|
||||
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
|
||||
typedef unsigned char v64qi __attribute__ ((vector_size (64*1)));
|
||||
|
||||
extern v64si a;
|
||||
extern v64qi b;
|
||||
|
||||
void
|
||||
f ()
|
||||
{
|
||||
for (int n = 0; n < 64; n++)
|
||||
a[n] = b[n];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */
|
21
gcc/testsuite/gcc.target/gcn/sram-ecc-4.c
Normal file
21
gcc/testsuite/gcc.target/gcn/sram-ecc-4.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets without sram-ecc enabled (in which sub-dword loads do not
|
||||
zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */
|
||||
|
||||
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
|
||||
typedef unsigned short v64hi __attribute__ ((vector_size (64*2)));
|
||||
|
||||
extern v64si a;
|
||||
extern v64hi b;
|
||||
|
||||
void
|
||||
f ()
|
||||
{
|
||||
for (int n = 0; n < 64; n++)
|
||||
a[n] = b[n];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */
|
17
gcc/testsuite/gcc.target/gcn/sram-ecc-5.c
Normal file
17
gcc/testsuite/gcc.target/gcn/sram-ecc-5.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets that may not have sram-ecc enabled (in which sub-dword loads do
|
||||
not zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msram-ecc=any" } */
|
||||
|
||||
extern unsigned char c;
|
||||
|
||||
unsigned int
|
||||
f ()
|
||||
{
|
||||
return c;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "lshl.* 24" } } */
|
||||
/* { dg-final { scan-assembler "lshr.* 24" } } */
|
17
gcc/testsuite/gcc.target/gcn/sram-ecc-6.c
Normal file
17
gcc/testsuite/gcc.target/gcn/sram-ecc-6.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets that may not have sram-ecc enabled (in which sub-dword loads do
|
||||
not zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msram-ecc=any" } */
|
||||
|
||||
extern unsigned short s;
|
||||
|
||||
unsigned short
|
||||
f ()
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "lshl.* 16" } } */
|
||||
/* { dg-final { scan-assembler "lshr.* 16" } } */
|
21
gcc/testsuite/gcc.target/gcn/sram-ecc-7.c
Normal file
21
gcc/testsuite/gcc.target/gcn/sram-ecc-7.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets that may not have sram-ecc enabled (in which sub-dword loads do
|
||||
not zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */
|
||||
|
||||
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
|
||||
typedef unsigned char v64qi __attribute__ ((vector_size (64*1)));
|
||||
|
||||
extern v64si a;
|
||||
extern v64qi b;
|
||||
|
||||
void
|
||||
f ()
|
||||
{
|
||||
for (int n = 0; n < 64; n++)
|
||||
a[n] = b[n];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */
|
21
gcc/testsuite/gcc.target/gcn/sram-ecc-8.c
Normal file
21
gcc/testsuite/gcc.target/gcn/sram-ecc-8.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* Ensure that explicit zero-entend instructions are present when compiling
|
||||
for targets that may not have sram-ecc enabled (in which sub-dword loads do
|
||||
not zero the high bits of the target register). */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */
|
||||
|
||||
typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
|
||||
typedef unsigned short v64hi __attribute__ ((vector_size (64*2)));
|
||||
|
||||
extern v64si a;
|
||||
extern v64hi b;
|
||||
|
||||
void
|
||||
f ()
|
||||
{
|
||||
for (int n = 0; n < 64; n++)
|
||||
a[n] = b[n];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */
|
Loading…
Reference in New Issue
Block a user